Activate renv to create renv.lock file to track dependencies for data reproducibility
renv::init()
If downloading project from github and want to restore previous dependencies used for analyses run this line
renv::restore()
Download packages so stored in renv.lock file
# To create Pdf
install.packages("knitr")
# Importing Data
install.packages("haven")
install.packages("readxl")
# Table specific
install.packages("xtable")
install.packages("kableExtra")
# Data Cleaning and plotting
install.packages("tidyverse")
install.packages("dplyr")
install.packages("plyr")
install.packages("ggplot2")
install.packages("scales")
install.packages("gridExtra")
# Skewness install.packages
install.packages("e1071")
# Matrix operations
install.packages("matrixStats")
# Mixed Models
install.packages("lme4")
install.packages("nlme")
#install.packages("HLMdiag") # Model diagnostics
# To read the lables
install.packages("Hmisc")
# to order the data
install.packages("doBy")
# to re-code variables
install.packages("rockchalk")
# for reshape
install.packages("MASS")
# descriptive and more
install.packages("psych")
# correlogram plot
install.packages("corrplot")
#glmer diagnostics
install.packages("fitdistrplus")
#outlier analysis and R^2
install.packages("performance")
install.packages("rptR")
# Creating stratified tables
install.packages("arsenal")
#For zero-inflated negative binomial modeling/model checking
install.packages("remotes")
library(remotes)
install_github("nyiuab/NBZIMM", force=T, build_vignettes=F)
install.packages("philentropy")
Load in libraries
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.1 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::group_rows() masks kableExtra::group_rows()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## ------------------------------------------------------------------------------
##
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
##
## ------------------------------------------------------------------------------
##
##
## Attaching package: 'plyr'
##
##
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
##
##
## The following object is masked from 'package:purrr':
##
## compact
##
##
##
## Attaching package: 'scales'
##
##
## The following object is masked from 'package:purrr':
##
## discard
##
##
## The following object is masked from 'package:readr':
##
## col_factor
##
##
##
## Attaching package: 'gridExtra'
##
##
## The following object is masked from 'package:dplyr':
##
## combine
##
##
##
## Attaching package: 'matrixStats'
##
##
## The following object is masked from 'package:plyr':
##
## count
##
##
## The following object is masked from 'package:dplyr':
##
## count
##
##
## Loading required package: Matrix
##
##
## Attaching package: 'Matrix'
##
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
##
##
## Attaching package: 'nlme'
##
##
## The following object is masked from 'package:lme4':
##
## lmList
##
##
## The following object is masked from 'package:dplyr':
##
## collapse
##
##
##
## Attaching package: 'Hmisc'
##
##
## The following object is masked from 'package:e1071':
##
## impute
##
##
## The following objects are masked from 'package:plyr':
##
## is.discrete, summarize
##
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
##
## The following objects are masked from 'package:xtable':
##
## label, label<-
##
##
## The following objects are masked from 'package:base':
##
## format.pval, units
##
##
##
## Attaching package: 'doBy'
##
##
## The following object is masked from 'package:dplyr':
##
## order_by
##
##
##
## Attaching package: 'rockchalk'
##
##
## The following object is masked from 'package:Hmisc':
##
## summarize
##
##
## The following objects are masked from 'package:e1071':
##
## kurtosis, skewness
##
##
## The following object is masked from 'package:plyr':
##
## summarize
##
##
## The following object is masked from 'package:dplyr':
##
## summarize
##
##
##
## Attaching package: 'MASS'
##
##
## The following object is masked from 'package:rockchalk':
##
## mvrnorm
##
##
## The following object is masked from 'package:dplyr':
##
## select
##
##
##
## Attaching package: 'psych'
##
##
## The following object is masked from 'package:Hmisc':
##
## describe
##
##
## The following objects are masked from 'package:scales':
##
## alpha, rescale
##
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
##
##
## corrplot 0.92 loaded
##
## Loading required package: survival
##
##
## Attaching package: 'performance'
##
##
## The following object is masked from 'package:xtable':
##
## display
##
##
##
## Attaching package: 'arsenal'
##
##
## The following object is masked from 'package:Hmisc':
##
## %nin%
##
##
## The following object is masked from 'package:matrixStats':
##
## iqr
##
##
## The following object is masked from 'package:scales':
##
## ordinal
##
##
## The following object is masked from 'package:lubridate':
##
## is.Date
##
##
##
## Attaching package: 'NBZIMM'
##
##
## The following object is masked from 'package:psych':
##
## sim
##
##
## The following object is masked from 'package:stringr':
##
## fixed
##
##
##
## Attaching package: 'philentropy'
##
##
## The following objects are masked from 'package:psych':
##
## distance, manhattan, minkowski
Note that the data cleaning and exploration for this analysis is in a separate file written by Hedyeh Ahmadi.
HEI_Aim2_Long <- read.csv("HEI_Aim2_Long.csv")
HEI_Aim2_Wide <- read.csv("HEI_Aim2_Wide.csv")
HEI_Aim2_Long_1KidPerFamily <- read.csv("HEI_Aim2_Long_1KidPerFamily.csv")
dim(HEI_Aim2_Long)
## [1] 30419 56
dim(HEI_Aim2_Wide)
## [1] 11866 140
dim(HEI_Aim2_Long_1KidPerFamily)
## [1] 25125 56
names(HEI_Aim2_Long)
## [1] "X" "subjectid"
## [3] "rel_family_id" "abcd_site"
## [5] "eventname" "rel_relationship"
## [7] "interview_date" "demo_l_p_select_language___1"
## [9] "cbcl_select_language___1" "rel_group_id"
## [11] "rel_ingroup_order" "rel_same_sex"
## [13] "reshist_addr1_pm252016aa" "sex"
## [15] "interview_age" "race_ethnicity"
## [17] "high.educ" "reshist_addr1_adi_perc"
## [19] "reshist_addr1_adi_wsum" "overall.income.b"
## [21] "overall.income.l" "overall.income.alltp"
## [23] "prnt.empl.bl" "prnt.empl.l"
## [25] "prnt.empl.alltp" "neighb_phenx_avg_p"
## [27] "neighb_phenx_sum_p" "reshist_addr1_popdensity"
## [29] "reshist_addr1_proxrd" "married"
## [31] "married.or.livingtogether" "cbcl_scr_syn_internal_r"
## [33] "cbcl_scr_syn_external_r" "cbcl_scr_syn_totprob_r"
## [35] "cbcl_scr_syn_anxdep_r" "cbcl_scr_syn_withdep_r"
## [37] "cbcl_scr_syn_attention_r" "cbcl_scr_syn_rulebreak_r"
## [39] "cbcl_scr_syn_aggressive_r" "cbcl_scr_syn_internal_t"
## [41] "cbcl_scr_syn_external_t" "cbcl_scr_syn_totprob_t"
## [43] "cbcl_scr_syn_anxdep_t" "cbcl_scr_syn_withdep_t"
## [45] "cbcl_scr_syn_attention_t" "cbcl_scr_syn_rulebreak_t"
## [47] "cbcl_scr_syn_aggressive_t" "reshist_addr1_years"
## [49] "income_midp" "demo_comb_income_v2"
## [51] "reshist_addr1_no2_2016_aavg" "reshist_addr1_o3_2016_annavg"
## [53] "reshist_addr1_pm252016aa_bl" "reshist_addr1_no2_2016_aavg_bl"
## [55] "reshist_addr1_o3_2016_annavg_bl" "high.educ_bl"
names(HEI_Aim2_Wide)
## [1] "X"
## [2] "subjectid"
## [3] "rel_family_id"
## [4] "rel_group_id"
## [5] "rel_same_sex"
## [6] "sex"
## [7] "race_ethnicity"
## [8] "reshist_addr1_pm252016aa.Baseline"
## [9] "abcd_site.Baseline"
## [10] "interview_date.Baseline"
## [11] "rel_relationship.Baseline"
## [12] "rel_ingroup_order.Baseline"
## [13] "high.educ.Baseline"
## [14] "interview_age.Baseline"
## [15] "demo_l_p_select_language___1.Baseline"
## [16] "cbcl_select_language___1.Baseline"
## [17] "reshist_addr1_adi_perc.Baseline"
## [18] "reshist_addr1_adi_wsum.Baseline"
## [19] "overall.income.b.Baseline"
## [20] "overall.income.l.Baseline"
## [21] "overall.income.alltp.Baseline"
## [22] "prnt.empl.bl.Baseline"
## [23] "prnt.empl.l.Baseline"
## [24] "prnt.empl.alltp.Baseline"
## [25] "neighb_phenx_avg_p.Baseline"
## [26] "neighb_phenx_sum_p.Baseline"
## [27] "reshist_addr1_popdensity.Baseline"
## [28] "reshist_addr1_proxrd.Baseline"
## [29] "married.Baseline"
## [30] "married.or.livingtogether.Baseline"
## [31] "cbcl_scr_syn_internal_r.Baseline"
## [32] "cbcl_scr_syn_external_r.Baseline"
## [33] "cbcl_scr_syn_totprob_r.Baseline"
## [34] "cbcl_scr_syn_anxdep_r.Baseline"
## [35] "cbcl_scr_syn_withdep_r.Baseline"
## [36] "cbcl_scr_syn_attention_r.Baseline"
## [37] "cbcl_scr_syn_rulebreak_r.Baseline"
## [38] "cbcl_scr_syn_aggressive_r.Baseline"
## [39] "cbcl_scr_syn_internal_t.Baseline"
## [40] "cbcl_scr_syn_external_t.Baseline"
## [41] "cbcl_scr_syn_totprob_t.Baseline"
## [42] "cbcl_scr_syn_anxdep_t.Baseline"
## [43] "cbcl_scr_syn_withdep_t.Baseline"
## [44] "cbcl_scr_syn_attention_t.Baseline"
## [45] "cbcl_scr_syn_rulebreak_t.Baseline"
## [46] "cbcl_scr_syn_aggressive_t.Baseline"
## [47] "reshist_addr1_years.Baseline"
## [48] "income_midp.Baseline"
## [49] "demo_comb_income_v2.Baseline"
## [50] "reshist_addr1_no2_2016_aavg.Baseline"
## [51] "reshist_addr1_o3_2016_annavg.Baseline"
## [52] "reshist_addr1_pm252016aa.1.year"
## [53] "abcd_site.1.year"
## [54] "interview_date.1.year"
## [55] "rel_relationship.1.year"
## [56] "rel_ingroup_order.1.year"
## [57] "high.educ.1.year"
## [58] "interview_age.1.year"
## [59] "demo_l_p_select_language___1.1.year"
## [60] "cbcl_select_language___1.1.year"
## [61] "reshist_addr1_adi_perc.1.year"
## [62] "reshist_addr1_adi_wsum.1.year"
## [63] "overall.income.b.1.year"
## [64] "overall.income.l.1.year"
## [65] "overall.income.alltp.1.year"
## [66] "prnt.empl.bl.1.year"
## [67] "prnt.empl.l.1.year"
## [68] "prnt.empl.alltp.1.year"
## [69] "neighb_phenx_avg_p.1.year"
## [70] "neighb_phenx_sum_p.1.year"
## [71] "reshist_addr1_popdensity.1.year"
## [72] "reshist_addr1_proxrd.1.year"
## [73] "married.1.year"
## [74] "married.or.livingtogether.1.year"
## [75] "cbcl_scr_syn_internal_r.1.year"
## [76] "cbcl_scr_syn_external_r.1.year"
## [77] "cbcl_scr_syn_totprob_r.1.year"
## [78] "cbcl_scr_syn_anxdep_r.1.year"
## [79] "cbcl_scr_syn_withdep_r.1.year"
## [80] "cbcl_scr_syn_attention_r.1.year"
## [81] "cbcl_scr_syn_rulebreak_r.1.year"
## [82] "cbcl_scr_syn_aggressive_r.1.year"
## [83] "cbcl_scr_syn_internal_t.1.year"
## [84] "cbcl_scr_syn_external_t.1.year"
## [85] "cbcl_scr_syn_totprob_t.1.year"
## [86] "cbcl_scr_syn_anxdep_t.1.year"
## [87] "cbcl_scr_syn_withdep_t.1.year"
## [88] "cbcl_scr_syn_attention_t.1.year"
## [89] "cbcl_scr_syn_rulebreak_t.1.year"
## [90] "cbcl_scr_syn_aggressive_t.1.year"
## [91] "reshist_addr1_years.1.year"
## [92] "income_midp.1.year"
## [93] "demo_comb_income_v2.1.year"
## [94] "reshist_addr1_no2_2016_aavg.1.year"
## [95] "reshist_addr1_o3_2016_annavg.1.year"
## [96] "reshist_addr1_pm252016aa.2.year"
## [97] "abcd_site.2.year"
## [98] "interview_date.2.year"
## [99] "rel_relationship.2.year"
## [100] "rel_ingroup_order.2.year"
## [101] "high.educ.2.year"
## [102] "interview_age.2.year"
## [103] "demo_l_p_select_language___1.2.year"
## [104] "cbcl_select_language___1.2.year"
## [105] "reshist_addr1_adi_perc.2.year"
## [106] "reshist_addr1_adi_wsum.2.year"
## [107] "overall.income.b.2.year"
## [108] "overall.income.l.2.year"
## [109] "overall.income.alltp.2.year"
## [110] "prnt.empl.bl.2.year"
## [111] "prnt.empl.l.2.year"
## [112] "prnt.empl.alltp.2.year"
## [113] "neighb_phenx_avg_p.2.year"
## [114] "neighb_phenx_sum_p.2.year"
## [115] "reshist_addr1_popdensity.2.year"
## [116] "reshist_addr1_proxrd.2.year"
## [117] "married.2.year"
## [118] "married.or.livingtogether.2.year"
## [119] "cbcl_scr_syn_internal_r.2.year"
## [120] "cbcl_scr_syn_external_r.2.year"
## [121] "cbcl_scr_syn_totprob_r.2.year"
## [122] "cbcl_scr_syn_anxdep_r.2.year"
## [123] "cbcl_scr_syn_withdep_r.2.year"
## [124] "cbcl_scr_syn_attention_r.2.year"
## [125] "cbcl_scr_syn_rulebreak_r.2.year"
## [126] "cbcl_scr_syn_aggressive_r.2.year"
## [127] "cbcl_scr_syn_internal_t.2.year"
## [128] "cbcl_scr_syn_external_t.2.year"
## [129] "cbcl_scr_syn_totprob_t.2.year"
## [130] "cbcl_scr_syn_anxdep_t.2.year"
## [131] "cbcl_scr_syn_withdep_t.2.year"
## [132] "cbcl_scr_syn_attention_t.2.year"
## [133] "cbcl_scr_syn_rulebreak_t.2.year"
## [134] "cbcl_scr_syn_aggressive_t.2.year"
## [135] "reshist_addr1_years.2.year"
## [136] "income_midp.2.year"
## [137] "demo_comb_income_v2.2.year"
## [138] "reshist_addr1_no2_2016_aavg.2.year"
## [139] "reshist_addr1_o3_2016_annavg.2.year"
## [140] "rel_relationship.1_year"
names(HEI_Aim2_Long_1KidPerFamily)
## [1] "X" "subjectid"
## [3] "rel_family_id" "abcd_site"
## [5] "eventname" "rel_relationship"
## [7] "interview_date" "demo_l_p_select_language___1"
## [9] "cbcl_select_language___1" "rel_group_id"
## [11] "rel_ingroup_order" "rel_same_sex"
## [13] "reshist_addr1_pm252016aa" "sex"
## [15] "interview_age" "race_ethnicity"
## [17] "high.educ" "reshist_addr1_adi_perc"
## [19] "reshist_addr1_adi_wsum" "overall.income.b"
## [21] "overall.income.l" "overall.income.alltp"
## [23] "prnt.empl.bl" "prnt.empl.l"
## [25] "prnt.empl.alltp" "neighb_phenx_avg_p"
## [27] "neighb_phenx_sum_p" "reshist_addr1_popdensity"
## [29] "reshist_addr1_proxrd" "married"
## [31] "married.or.livingtogether" "cbcl_scr_syn_internal_r"
## [33] "cbcl_scr_syn_external_r" "cbcl_scr_syn_totprob_r"
## [35] "cbcl_scr_syn_anxdep_r" "cbcl_scr_syn_withdep_r"
## [37] "cbcl_scr_syn_attention_r" "cbcl_scr_syn_rulebreak_r"
## [39] "cbcl_scr_syn_aggressive_r" "cbcl_scr_syn_internal_t"
## [41] "cbcl_scr_syn_external_t" "cbcl_scr_syn_totprob_t"
## [43] "cbcl_scr_syn_anxdep_t" "cbcl_scr_syn_withdep_t"
## [45] "cbcl_scr_syn_attention_t" "cbcl_scr_syn_rulebreak_t"
## [47] "cbcl_scr_syn_aggressive_t" "reshist_addr1_years"
## [49] "income_midp" "demo_comb_income_v2"
## [51] "reshist_addr1_no2_2016_aavg" "reshist_addr1_o3_2016_annavg"
## [53] "reshist_addr1_pm252016aa_bl" "reshist_addr1_no2_2016_aavg_bl"
## [55] "reshist_addr1_o3_2016_annavg_bl" "high.educ_bl"
# Note we are keeping all families but choosing one kid per family
length(unique(HEI_Aim2_Long$rel_family_id))
## [1] 9844
length(unique(HEI_Aim2_Long$subjectid)) # matches number of rows of wide data :)
## [1] 11866
length(unique(HEI_Aim2_Long_1KidPerFamily$rel_family_id))
## [1] 9844
length(unique(HEI_Aim2_Long_1KidPerFamily$subjectid))
## [1] 9844
#rename so can use later
names(HEI_Aim2_Long)[names(HEI_Aim2_Long) == 'prnt.empl.bl'] <- 'prnt.empl.b'
#create dataset for table and comparison
baseline_vars <- subset(HEI_Aim2_Long, HEI_Aim2_Long$eventname=="Baseline", select = c("subjectid", "sex", "race_ethnicity", "high.educ", "neighb_phenx_avg_p", "overall.income.b", "prnt.empl.b"))
#rename variables
names(baseline_vars)[names(baseline_vars) == 'sex'] <- 'sex.bl'
names(baseline_vars)[names(baseline_vars) == 'race_ethnicity'] <- 'race_ethnicity.bl'
names(baseline_vars)[names(baseline_vars) == 'high.educ'] <- 'high.educ.bl'
names(baseline_vars)[names(baseline_vars) == 'neighb_phenx_avg_p'] <- 'neighb_phenx_avg_p.bl'
names(baseline_vars)[names(baseline_vars) == 'overall.income.b'] <- 'overall.income.bl'
names(baseline_vars)[names(baseline_vars) == 'prnt.empl.b'] <- 'prnt.empl.bl'
#add to initial df
HEI_Aim2_Long_2 <- merge(HEI_Aim2_Long, baseline_vars, by="subjectid")
#factor eventname
HEI_Aim2_Long_2$eventname <- as.factor(HEI_Aim2_Long_2$eventname)
HEI_Aim2_Long_2$eventname <- relevel(HEI_Aim2_Long_2$eventname , ref="Baseline")
#create smaller df
df_prior <- subset(HEI_Aim2_Long_2,select=c("subjectid","abcd_site","eventname","interview_age","reshist_addr1_pm252016aa_bl","prnt.empl.bl","overall.income.bl","sex.bl","race_ethnicity.bl","high.educ.bl","neighb_phenx_avg_p.bl","cbcl_scr_syn_internal_r","cbcl_scr_syn_external_r","cbcl_scr_syn_anxdep_r","cbcl_scr_syn_withdep_r","cbcl_scr_syn_attention_r","cbcl_scr_syn_rulebreak_r","cbcl_scr_syn_aggressive_r","cbcl_scr_syn_totprob_r"))
#create table
des_table_prior <- tableby(eventname ~ ., data = df_prior[ , -which(names(df_prior) %in% c("subjectid"))], total=F)
summary(des_table_prior, title = "Descriptive Statistics by Eventname Before Cleaning")
##
##
## Table: Descriptive Statistics by Eventname Before Cleaning
##
## | | Baseline (N=11839) | 1-year (N=11200) | 2-year (N=7334) | p value|
## |:----------------------------------------|:------------------:|:-----------------:|:-----------------:|-------:|
## |**abcd_site** | | | | < 0.001|
## | site01 | 406 (3.4%) | 369 (3.3%) | 210 (2.9%) | |
## | site02 | 558 (4.7%) | 548 (4.9%) | 351 (4.8%) | |
## | site03 | 631 (5.3%) | 563 (5.0%) | 372 (5.1%) | |
## | site04 | 745 (6.3%) | 727 (6.5%) | 534 (7.3%) | |
## | site05 | 378 (3.2%) | 357 (3.2%) | 234 (3.2%) | |
## | site06 | 584 (4.9%) | 568 (5.1%) | 379 (5.2%) | |
## | site07 | 339 (2.9%) | 322 (2.9%) | 116 (1.6%) | |
## | site08 | 350 (3.0%) | 339 (3.0%) | 212 (2.9%) | |
## | site09 | 433 (3.7%) | 393 (3.5%) | 227 (3.1%) | |
## | site10 | 739 (6.2%) | 708 (6.3%) | 493 (6.7%) | |
## | site11 | 450 (3.8%) | 400 (3.6%) | 197 (2.7%) | |
## | site12 | 604 (5.1%) | 550 (4.9%) | 274 (3.7%) | |
## | site13 | 728 (6.1%) | 691 (6.2%) | 443 (6.0%) | |
## | site14 | 606 (5.1%) | 583 (5.2%) | 430 (5.9%) | |
## | site15 | 458 (3.9%) | 426 (3.8%) | 266 (3.6%) | |
## | site16 | 1011 (8.5%) | 979 (8.7%) | 640 (8.7%) | |
## | site17 | 578 (4.9%) | 562 (5.0%) | 374 (5.1%) | |
## | site18 | 384 (3.2%) | 376 (3.4%) | 223 (3.0%) | |
## | site19 | 550 (4.6%) | 521 (4.7%) | 397 (5.4%) | |
## | site20 | 707 (6.0%) | 687 (6.1%) | 528 (7.2%) | |
## | site21 | 600 (5.1%) | 531 (4.7%) | 434 (5.9%) | |
## |**interview_age** | | | | < 0.001|
## | Mean (SD) | 118.967 (7.495) | 131.073 (7.714) | 143.361 (7.747) | |
## | Range | 107.000 - 133.000 | 116.000 - 149.000 | 127.000 - 164.000 | |
## |**reshist_addr1_pm252016aa_bl** | | | | 0.745|
## | N-Miss | 651 | 587 | 224 | |
## | Mean (SD) | 7.663 (1.563) | 7.648 (1.561) | 7.650 (1.535) | |
## | Range | 1.722 - 15.902 | 1.722 - 15.902 | 1.722 - 15.902 | |
## |**prnt.empl.bl** | | | | 0.098|
## | N-Miss | 56 | 47 | 22 | |
## | Employed | 8194 (69.5%) | 7826 (70.2%) | 5214 (71.3%) | |
## | Other | 855 (7.3%) | 791 (7.1%) | 474 (6.5%) | |
## | Stay at Home Parent | 2065 (17.5%) | 1941 (17.4%) | 1262 (17.3%) | |
## | Unemployed | 669 (5.7%) | 595 (5.3%) | 362 (5.0%) | |
## |**overall.income.bl** | | | | 0.001|
## | N-Miss | 2 | 1 | 0 | |
## | [<50k] | 3215 (27.2%) | 2930 (26.2%) | 1833 (25.0%) | |
## | [>=100K] | 4544 (38.4%) | 4419 (39.5%) | 2952 (40.3%) | |
## | [>=50K & <100K] | 3065 (25.9%) | 2937 (26.2%) | 2000 (27.3%) | |
## | [Don't Know or Refuse] | 1013 (8.6%) | 913 (8.2%) | 549 (7.5%) | |
## |**sex.bl** | | | | 0.906|
## | Female | 5658 (47.8%) | 5335 (47.6%) | 3481 (47.5%) | |
## | Male | 6181 (52.2%) | 5865 (52.4%) | 3853 (52.5%) | |
## |**race_ethnicity.bl** | | | | < 0.001|
## | N-Miss | 2 | 2 | 0 | |
## | Asian | 250 (2.1%) | 239 (2.1%) | 158 (2.2%) | |
## | Black | 1777 (15.0%) | 1594 (14.2%) | 874 (11.9%) | |
## | Hispanic | 2405 (20.3%) | 2220 (19.8%) | 1411 (19.2%) | |
## | Other | 1243 (10.5%) | 1171 (10.5%) | 724 (9.9%) | |
## | White | 6162 (52.1%) | 5974 (53.3%) | 4167 (56.8%) | |
## |**high.educ.bl** | | | | < 0.001|
## | N-Miss | 14 | 12 | 10 | |
## | < HS Diploma | 592 (5.0%) | 526 (4.7%) | 306 (4.2%) | |
## | Bachelor | 3006 (25.4%) | 2889 (25.8%) | 2002 (27.3%) | |
## | HS Diploma/GED | 1129 (9.5%) | 1007 (9.0%) | 568 (7.8%) | |
## | Post Graduate Degree | 4025 (34.0%) | 3919 (35.0%) | 2611 (35.6%) | |
## | Some College | 3073 (26.0%) | 2847 (25.4%) | 1837 (25.1%) | |
## |**neighb_phenx_avg_p.bl** | | | | 0.003|
## | N-Miss | 8 | 5 | 3 | |
## | Mean (SD) | 3.890 (0.975) | 3.903 (0.969) | 3.938 (0.942) | |
## | Range | 1.000 - 5.000 | 1.000 - 5.000 | 1.000 - 5.000 | |
## |**cbcl_scr_syn_internal_r** | | | | 0.100|
## | N-Miss | 8 | 18 | 5 | |
## | Mean (SD) | 5.043 (5.522) | 5.108 (5.551) | 4.930 (5.614) | |
## | Range | 0.000 - 51.000 | 0.000 - 48.000 | 0.000 - 50.000 | |
## |**cbcl_scr_syn_external_r** | | | | < 0.001|
## | N-Miss | 8 | 18 | 5 | |
## | Mean (SD) | 4.455 (5.867) | 4.176 (5.656) | 3.918 (5.479) | |
## | Range | 0.000 - 49.000 | 0.000 - 47.000 | 0.000 - 46.000 | |
## |**cbcl_scr_syn_anxdep_r** | | | | < 0.001|
## | N-Miss | 8 | 18 | 5 | |
## | Mean (SD) | 2.516 (3.062) | 2.540 (3.072) | 2.322 (2.971) | |
## | Range | 0.000 - 26.000 | 0.000 - 22.000 | 0.000 - 22.000 | |
## |**cbcl_scr_syn_withdep_r** | | | | < 0.001|
## | N-Miss | 8 | 18 | 5 | |
## | Mean (SD) | 1.034 (1.709) | 1.116 (1.778) | 1.201 (1.901) | |
## | Range | 0.000 - 15.000 | 0.000 - 14.000 | 0.000 - 16.000 | |
## |**cbcl_scr_syn_attention_r** | | | | < 0.001|
## | N-Miss | 8 | 18 | 5 | |
## | Mean (SD) | 2.977 (3.495) | 2.858 (3.431) | 2.692 (3.298) | |
## | Range | 0.000 - 20.000 | 0.000 - 19.000 | 0.000 - 19.000 | |
## |**cbcl_scr_syn_rulebreak_r** | | | | < 0.001|
## | N-Miss | 8 | 18 | 5 | |
## | Mean (SD) | 1.192 (1.861) | 1.120 (1.822) | 1.057 (1.833) | |
## | Range | 0.000 - 20.000 | 0.000 - 20.000 | 0.000 - 23.000 | |
## |**cbcl_scr_syn_aggressive_r** | | | | < 0.001|
## | N-Miss | 8 | 18 | 5 | |
## | Mean (SD) | 3.262 (4.355) | 3.056 (4.185) | 2.861 (3.990) | |
## | Range | 0.000 - 36.000 | 0.000 - 33.000 | 0.000 - 32.000 | |
## |**cbcl_scr_syn_totprob_r** | | | | < 0.001|
## | N-Miss | 8 | 18 | 5 | |
## | Mean (SD) | 18.178 (17.968) | 17.520 (17.567) | 16.388 (17.001) | |
## | Range | 0.000 - 139.000 | 0.000 - 128.000 | 0.000 - 161.000 | |
The following variables are time-invariant, will use baseline covariates since PM2.5 collected at baseline: - reshist_addr1_pm252016aa_bl which is the Baseline PM2.5. - reshist_addr1_no2_2016_aavg_bl which is the Baseline NO2. - sex.bl - race_ethnicity.bl - high.educ.bl - prnt.empl.bl - neighb_phenx_avg_p.bl - overall.income.bl
The following variables are time-varying: - all CBCL outcomes - interview_age
#rename so can use later
names(HEI_Aim2_Long_1KidPerFamily)[names(HEI_Aim2_Long_1KidPerFamily) == 'prnt.empl.bl'] <- 'prnt.empl.b'
#create dataset for table and comparison
baseline_vars_1KidPerFamily <- subset(HEI_Aim2_Long_1KidPerFamily, HEI_Aim2_Long_1KidPerFamily$eventname=="Baseline", select = c("subjectid", "sex", "race_ethnicity", "high.educ", "neighb_phenx_avg_p", "overall.income.b"))
#rename variables
names(baseline_vars_1KidPerFamily)[names(baseline_vars_1KidPerFamily) == 'sex'] <- 'sex.bl'
names(baseline_vars_1KidPerFamily)[names(baseline_vars_1KidPerFamily) == 'race_ethnicity'] <- 'race_ethnicity.bl'
names(baseline_vars_1KidPerFamily)[names(baseline_vars_1KidPerFamily) == 'high.educ'] <- 'high.educ.bl'
names(baseline_vars_1KidPerFamily)[names(baseline_vars_1KidPerFamily) == 'neighb_phenx_avg_p'] <- 'neighb_phenx_avg_p.bl'
names(baseline_vars_1KidPerFamily)[names(baseline_vars_1KidPerFamily) == 'overall.income.b'] <- 'overall.income.bl'
names(baseline_vars_1KidPerFamily)[names(baseline_vars_1KidPerFamily) == 'prnt.empl.b'] <- 'prnt.empl.bl'
#add to initial df
HEI_Aim2_Long_1KidPerFamily_2 <- merge(HEI_Aim2_Long_1KidPerFamily, baseline_vars, by="subjectid")
## Cleaning
#merge Asian into Other group b/c statistically Asian group is too small
tapply(HEI_Aim2_Long_1KidPerFamily_2$race_ethnicity.bl,
HEI_Aim2_Long_1KidPerFamily_2$eventname,table, useNA = "always")
## $`1-year`
##
## Asian Black Hispanic Other White <NA>
## 217 1334 1932 964 4803 1
##
## $`2-year`
##
## Asian Black Hispanic Other White <NA>
## 141 715 1232 597 3326 0
##
## $Baseline
##
## Asian Black Hispanic Other White <NA>
## 228 1496 2101 1029 4963 1
HEI_Aim2_Long_1KidPerFamily_2$race_ethnicity.bl <-
ifelse(HEI_Aim2_Long_1KidPerFamily_2$race_ethnicity.bl=="Asian","Other",
HEI_Aim2_Long_1KidPerFamily_2$race_ethnicity.bl)
tapply(HEI_Aim2_Long_1KidPerFamily_2$race_ethnicity.bl,
HEI_Aim2_Long_1KidPerFamily_2$eventname,table, useNA = "always")
## $`1-year`
##
## Black Hispanic Other White <NA>
## 1334 1932 1181 4803 1
##
## $`2-year`
##
## Black Hispanic Other White <NA>
## 715 1232 738 3326 0
##
## $Baseline
##
## Black Hispanic Other White <NA>
## 1496 2101 1257 4963 1
#reformat variables
HEI_Aim2_Long_1KidPerFamily_2$eventname <-
as.factor(HEI_Aim2_Long_1KidPerFamily_2$eventname)
HEI_Aim2_Long_1KidPerFamily_2$eventname <-
relevel(HEI_Aim2_Long_1KidPerFamily_2$eventname , ref="Baseline")
table(HEI_Aim2_Long_1KidPerFamily_2$eventname, useNA = "always")
##
## Baseline 1-year 2-year <NA>
## 9818 9251 6011 0
HEI_Aim2_Long_1KidPerFamily_2$cbcl_scr_syn_internal_r <- as.numeric(HEI_Aim2_Long_1KidPerFamily_2$cbcl_scr_syn_internal_r)
HEI_Aim2_Long_1KidPerFamily_2$cbcl_scr_syn_external_r <- as.numeric(HEI_Aim2_Long_1KidPerFamily_2$cbcl_scr_syn_external_r)
HEI_Aim2_Long_1KidPerFamily_2$cbcl_scr_syn_anxdep_r <- as.numeric(HEI_Aim2_Long_1KidPerFamily_2$cbcl_scr_syn_anxdep_r)
HEI_Aim2_Long_1KidPerFamily_2$cbcl_scr_syn_withdep_r <- as.numeric(HEI_Aim2_Long_1KidPerFamily_2$cbcl_scr_syn_withdep_r)
HEI_Aim2_Long_1KidPerFamily_2$cbcl_scr_syn_attention_r <- as.numeric(HEI_Aim2_Long_1KidPerFamily_2$cbcl_scr_syn_attention_r)
HEI_Aim2_Long_1KidPerFamily_2$cbcl_scr_syn_rulebreak_r <- as.numeric(HEI_Aim2_Long_1KidPerFamily_2$cbcl_scr_syn_rulebreak_r)
HEI_Aim2_Long_1KidPerFamily_2$cbcl_scr_syn_aggressive_r <- as.numeric(HEI_Aim2_Long_1KidPerFamily_2$cbcl_scr_syn_aggressive_r)
HEI_Aim2_Long_1KidPerFamily_2$cbcl_scr_syn_totprob_r <- as.numeric(HEI_Aim2_Long_1KidPerFamily_2$cbcl_scr_syn_totprob_r)
HEI_Aim2_Long_1KidPerFamily_2$abcd_site <- as.factor(HEI_Aim2_Long_1KidPerFamily_2$abcd_site)
HEI_Aim2_Long_1KidPerFamily_2$subjectid <- as.factor(HEI_Aim2_Long_1KidPerFamily_2$subjectid)
HEI_Aim2_Long_1KidPerFamily_2$prnt.empl.bl <- factor(HEI_Aim2_Long_1KidPerFamily_2$prnt.empl.bl, levels = c("Employed", "Stay at Home Parent", "Unemployed", "Other"))
HEI_Aim2_Long_1KidPerFamily_2$overall.income.bl <- factor(HEI_Aim2_Long_1KidPerFamily_2$overall.income.bl, levels = c("[>=100K]", "[>=50K & <100K]", "[<50k]", "[Don't Know or Refuse]"))
HEI_Aim2_Long_1KidPerFamily_2$sex.bl <- factor(HEI_Aim2_Long_1KidPerFamily_2$sex.bl, levels = c("Male", "Female"))
HEI_Aim2_Long_1KidPerFamily_2$race_ethnicity.bl <- factor(HEI_Aim2_Long_1KidPerFamily_2$race_ethnicity.bl, levels = c("White", "Hispanic", "Black", "Other"))
HEI_Aim2_Long_1KidPerFamily_2$high.educ.bl <- factor(HEI_Aim2_Long_1KidPerFamily_2$high.educ.bl, levels = c("Post Graduate Degree", "Bachelor", "Some College", "HS Diploma/GED", "< HS Diploma"))
#create smaller df
df <- subset(HEI_Aim2_Long_1KidPerFamily_2,select=c("subjectid","abcd_site","eventname","interview_age","reshist_addr1_pm252016aa_bl","reshist_addr1_no2_2016_aavg_bl","prnt.empl.bl","overall.income.bl","sex.bl","race_ethnicity.bl","high.educ.bl","neighb_phenx_avg_p.bl","cbcl_scr_syn_internal_r","cbcl_scr_syn_external_r","cbcl_scr_syn_anxdep_r","cbcl_scr_syn_withdep_r","cbcl_scr_syn_attention_r","cbcl_scr_syn_rulebreak_r","cbcl_scr_syn_aggressive_r","cbcl_scr_syn_totprob_r"))
#complete cases because needed for zinb
df_cc <- df[complete.cases(df),]
#percentage of subjects lost with complete.cases
lost_sub <- data.frame(table(df$eventname))
colnames(lost_sub) <- c("eventname","abcd")
interim <- data.frame(table(df_cc$eventname))[2]
lost_sub$sample <- interim$Freq
lost_sub$diff <- lost_sub$abcd - lost_sub$sample
lost_sub$percent <- lost_sub$diff/lost_sub$abcd
#center age at 9years-old (i.e., 108 months)
df_cc$interview_age.c9 <- df_cc$interview_age-108
#change to years
df_cc$interview_age.c9.y <- df_cc$interview_age.c9/12
#center pm2.5 to 5 (recommended by WHO)
df_cc$reshist_addr1_pm252016aa_bl.c5 <- df_cc$reshist_addr1_pm252016aa_bl-5
#center no2 to 5.33 (recommended by WHO)
df_cc$reshist_addr1_no2_2016_aavg_bl.c533 <- df_cc$reshist_addr1_no2_2016_aavg_bl-5.33
#center around mean
neighb_phenx_avg_p.bl.cm <- df_cc$neighb_phenx_avg_p.bl - mean(df_cc$neighb_phenx_avg_p.bl)
#create table
des_table <- tableby(eventname ~ ., data = df_cc[ , -which(names(df_cc) %in% c("subjectid"))], total=F)
summary(des_table, title = "Descriptive Statistics by Eventname After Cleaning")
| Baseline (N=9271) | 1-year (N=8759) | 2-year (N=5827) | p value | |
|---|---|---|---|---|
| abcd_site | < 0.001 | |||
| site01 | 323 (3.5%) | 299 (3.4%) | 184 (3.2%) | |
| site02 | 305 (3.3%) | 298 (3.4%) | 210 (3.6%) | |
| site03 | 556 (6.0%) | 493 (5.6%) | 330 (5.7%) | |
| site04 | 631 (6.8%) | 615 (7.0%) | 451 (7.7%) | |
| site05 | 322 (3.5%) | 305 (3.5%) | 203 (3.5%) | |
| site06 | 509 (5.5%) | 495 (5.7%) | 334 (5.7%) | |
| site07 | 290 (3.1%) | 275 (3.1%) | 102 (1.8%) | |
| site08 | 302 (3.3%) | 289 (3.3%) | 183 (3.1%) | |
| site09 | 404 (4.4%) | 366 (4.2%) | 214 (3.7%) | |
| site10 | 623 (6.7%) | 592 (6.8%) | 427 (7.3%) | |
| site11 | 382 (4.1%) | 339 (3.9%) | 166 (2.8%) | |
| site12 | 481 (5.2%) | 438 (5.0%) | 236 (4.1%) | |
| site13 | 617 (6.7%) | 585 (6.7%) | 387 (6.6%) | |
| site14 | 359 (3.9%) | 344 (3.9%) | 255 (4.4%) | |
| site15 | 398 (4.3%) | 371 (4.2%) | 232 (4.0%) | |
| site16 | 794 (8.6%) | 769 (8.8%) | 500 (8.6%) | |
| site17 | 509 (5.5%) | 495 (5.7%) | 336 (5.8%) | |
| site18 | 351 (3.8%) | 344 (3.9%) | 206 (3.5%) | |
| site19 | 216 (2.3%) | 208 (2.4%) | 185 (3.2%) | |
| site20 | 447 (4.8%) | 433 (4.9%) | 327 (5.6%) | |
| site21 | 452 (4.9%) | 406 (4.6%) | 359 (6.2%) | |
| interview_age | < 0.001 | |||
| Mean (SD) | 118.856 (7.411) | 130.924 (7.617) | 143.081 (7.635) | |
| Range | 107.000 - 133.000 | 117.000 - 149.000 | 127.000 - 164.000 | |
| reshist_addr1_pm252016aa_bl | 0.771 | |||
| Mean (SD) | 7.706 (1.571) | 7.693 (1.571) | 7.690 (1.552) | |
| Range | 1.722 - 15.902 | 1.722 - 15.902 | 1.722 - 15.902 | |
| reshist_addr1_no2_2016_aavg_bl | 0.972 | |||
| Mean (SD) | 18.595 (5.571) | 18.579 (5.567) | 18.575 (5.573) | |
| Range | 0.729 - 37.940 | 0.729 - 37.940 | 0.729 - 35.346 | |
| prnt.empl.bl | 0.373 | |||
| Employed | 6444 (69.5%) | 6146 (70.2%) | 4139 (71.0%) | |
| Stay at Home Parent | 1612 (17.4%) | 1516 (17.3%) | 1002 (17.2%) | |
| Unemployed | 539 (5.8%) | 481 (5.5%) | 299 (5.1%) | |
| Other | 676 (7.3%) | 616 (7.0%) | 387 (6.6%) | |
| overall.income.bl | 0.018 | |||
| [>=100K] | 3514 (37.9%) | 3418 (39.0%) | 2307 (39.6%) | |
| [>=50K & <100K] | 2419 (26.1%) | 2312 (26.4%) | 1597 (27.4%) | |
| [<50k] | 2564 (27.7%) | 2335 (26.7%) | 1494 (25.6%) | |
| [Don’t Know or Refuse] | 774 (8.3%) | 694 (7.9%) | 429 (7.4%) | |
| sex.bl | 0.861 | |||
| Male | 4858 (52.4%) | 4605 (52.6%) | 3080 (52.9%) | |
| Female | 4413 (47.6%) | 4154 (47.4%) | 2747 (47.1%) | |
| race_ethnicity.bl | < 0.001 | |||
| White | 4759 (51.3%) | 4612 (52.7%) | 3236 (55.5%) | |
| Hispanic | 1958 (21.1%) | 1800 (20.6%) | 1198 (20.6%) | |
| Black | 1363 (14.7%) | 1221 (13.9%) | 678 (11.6%) | |
| Other | 1191 (12.8%) | 1126 (12.9%) | 715 (12.3%) | |
| high.educ.bl | 0.002 | |||
| Post Graduate Degree | 3179 (34.3%) | 3090 (35.3%) | 2093 (35.9%) | |
| Bachelor | 2310 (24.9%) | 2217 (25.3%) | 1551 (26.6%) | |
| Some College | 2423 (26.1%) | 2247 (25.7%) | 1466 (25.2%) | |
| HS Diploma/GED | 899 (9.7%) | 800 (9.1%) | 460 (7.9%) | |
| < HS Diploma | 460 (5.0%) | 405 (4.6%) | 257 (4.4%) | |
| neighb_phenx_avg_p.bl | 0.034 | |||
| Mean (SD) | 3.873 (0.976) | 3.884 (0.971) | 3.915 (0.948) | |
| Range | 1.000 - 5.000 | 1.000 - 5.000 | 1.000 - 5.000 | |
| cbcl_scr_syn_internal_r | 0.042 | |||
| Mean (SD) | 5.154 (5.539) | 5.281 (5.612) | 5.047 (5.610) | |
| Range | 0.000 - 51.000 | 0.000 - 48.000 | 0.000 - 50.000 | |
| cbcl_scr_syn_external_r | < 0.001 | |||
| Mean (SD) | 4.484 (5.798) | 4.232 (5.622) | 3.949 (5.368) | |
| Range | 0.000 - 49.000 | 0.000 - 46.000 | 0.000 - 46.000 | |
| cbcl_scr_syn_anxdep_r | < 0.001 | |||
| Mean (SD) | 2.569 (3.060) | 2.613 (3.091) | 2.352 (2.941) | |
| Range | 0.000 - 26.000 | 0.000 - 22.000 | 0.000 - 22.000 | |
| cbcl_scr_syn_withdep_r | < 0.001 | |||
| Mean (SD) | 1.045 (1.700) | 1.151 (1.800) | 1.235 (1.912) | |
| Range | 0.000 - 14.000 | 0.000 - 14.000 | 0.000 - 16.000 | |
| cbcl_scr_syn_attention_r | < 0.001 | |||
| Mean (SD) | 3.042 (3.508) | 2.941 (3.458) | 2.782 (3.324) | |
| Range | 0.000 - 19.000 | 0.000 - 19.000 | 0.000 - 19.000 | |
| cbcl_scr_syn_rulebreak_r | < 0.001 | |||
| Mean (SD) | 1.204 (1.844) | 1.142 (1.823) | 1.068 (1.829) | |
| Range | 0.000 - 20.000 | 0.000 - 20.000 | 0.000 - 23.000 | |
| cbcl_scr_syn_aggressive_r | < 0.001 | |||
| Mean (SD) | 3.280 (4.305) | 3.090 (4.154) | 2.881 (3.894) | |
| Range | 0.000 - 36.000 | 0.000 - 33.000 | 0.000 - 32.000 | |
| cbcl_scr_syn_totprob_r | < 0.001 | |||
| Mean (SD) | 18.519 (17.931) | 17.986 (17.659) | 16.755 (16.864) | |
| Range | 0.000 - 139.000 | 0.000 - 128.000 | 0.000 - 161.000 | |
| interview_age.c9 | < 0.001 | |||
| Mean (SD) | 10.856 (7.411) | 22.924 (7.617) | 35.081 (7.635) | |
| Range | -1.000 - 25.000 | 9.000 - 41.000 | 19.000 - 56.000 | |
| interview_age.c9.y | < 0.001 | |||
| Mean (SD) | 0.905 (0.618) | 1.910 (0.635) | 2.923 (0.636) | |
| Range | -0.083 - 2.083 | 0.750 - 3.417 | 1.583 - 4.667 | |
| reshist_addr1_pm252016aa_bl.c5 | 0.771 | |||
| Mean (SD) | 2.706 (1.571) | 2.693 (1.571) | 2.690 (1.552) | |
| Range | -3.278 - 10.902 | -3.278 - 10.902 | -3.278 - 10.902 | |
| reshist_addr1_no2_2016_aavg_bl.c533 | 0.972 | |||
| Mean (SD) | 13.265 (5.571) | 13.249 (5.567) | 13.245 (5.573) | |
| Range | -4.601 - 32.610 | -4.601 - 32.610 | -4.601 - 30.016 |
Final_DF_Descriptives <- summary(des_table, title = "Descriptive Statistics by Eventname After Cleaning")
#write.csv(Final_DF_Descriptives, "Descriptives_Final_Dataset.csv")
# Using the following two CBCL data as full and final analytic sample
Full_Data <-
df_prior[,c("subjectid","abcd_site","eventname","interview_age","prnt.empl.bl","overall.income.bl","sex.bl","race_ethnicity.bl","high.educ.bl","neighb_phenx_avg_p.bl")]
Analysis_Data <-
df_cc[,c("subjectid","abcd_site","eventname","interview_age","prnt.empl.bl","overall.income.bl","sex.bl","race_ethnicity.bl","high.educ.bl","neighb_phenx_avg_p.bl")]
str(Full_Data)
‘data.frame’: 30373 obs. of 10 variables: $ subjectid : chr “NDAR_INV003RTV85” “NDAR_INV003RTV85” “NDAR_INV005V6D2C” “NDAR_INV005V6D2C” … $ abcd_site : chr “site06” “site06” “site10” “site10” … $ eventname : Factor w/ 3 levels “Baseline”,“1-year”,..: 2 1 2 1 2 1 2 3 1 2 … $ interview_age : int 143 131 130 121 123 112 142 152 130 138 … $ prnt.empl.bl : chr “Employed” “Employed” “Stay at Home Parent” “Stay at Home Parent” … $ overall.income.bl : chr “[>=50K & <100K]” “[>=50K & <100K]” “[Don’t Know or Refuse]” “[Don’t Know or Refuse]” … $ sex.bl : chr “Female” “Female” “Female” “Female” … $ race_ethnicity.bl : chr “White” “White” “Hispanic” “Hispanic” … $ high.educ.bl : chr “HS Diploma/GED” “HS Diploma/GED” “< HS Diploma” “< HS Diploma” … $ neighb_phenx_avg_p.bl: num 5 5 4.33 4.33 5 …
str(Analysis_Data)
‘data.frame’: 23857 obs. of 10 variables: $ subjectid : Factor w/ 9818 levels “NDAR_INV003RTV85”,..: 2 2 3 3 4 4 4 5 5 5 … $ abcd_site : Factor w/ 21 levels “site01”,“site02”,..: 10 10 7 7 20 20 20 12 12 12 … $ eventname : Factor w/ 3 levels “Baseline”,“1-year”,..: 2 1 2 1 2 3 1 2 3 1 … $ interview_age : int 130 121 123 112 142 152 130 138 149 124 … $ prnt.empl.bl : Factor w/ 4 levels “Employed”,“Stay at Home Parent”,..: 2 2 1 1 1 1 1 1 1 1 … $ overall.income.bl : Factor w/ 4 levels “[>=100K]”,“[>=50K & <100K]”,..: 4 4 1 1 3 3 3 4 4 4 … $ sex.bl : Factor w/ 2 levels “Male”,“Female”: 2 2 1 1 1 1 1 1 1 1 … $ race_ethnicity.bl : Factor w/ 4 levels “White”,“Hispanic”,..: 2 2 1 1 1 1 1 3 3 3 … $ high.educ.bl : Factor w/ 5 levels “Post Graduate Degree”,..: 5 5 1 1 3 3 3 4 4 4 … $ neighb_phenx_avg_p.bl: num 4.33 4.33 5 5 3.67 …
Full_Data$label = "Full_data"
Analysis_Data$label = "Analysis_data"
# Merging the two data set to be used in arsenal package
CBCL_Combined_Data <- rbind(Full_Data, Analysis_Data)
names(CBCL_Combined_Data)
[1] “subjectid” “abcd_site” “eventname”
[4] “interview_age” “prnt.empl.bl” “overall.income.bl”
[7] “sex.bl” “race_ethnicity.bl” “high.educ.bl”
[10] “neighb_phenx_avg_p.bl” “label”
dim(CBCL_Combined_Data)
[1] 54230 11
dim(Full_Data)[1] + dim(Analysis_Data)[1]
[1] 54230
# Baseline comparison--------------------------------------------------------
Combined_Data_rbl_Baseline <-
tableby(label ~ sex.bl + interview_age +
race_ethnicity.bl + high.educ.bl + prnt.empl.bl + neighb_phenx_avg_p.bl +
overall.income.bl,
subset= (eventname=="Baseline"),
data = CBCL_Combined_Data, total=F)
Combined_Data_Compare_Baseline <- summary(Combined_Data_rbl_Baseline)
Combined_Data_Compare_Baseline
| Analysis_data (N=9271) | Full_data (N=11839) | p value | |
|---|---|---|---|
| sex.bl | 0.783 | ||
| Female | 4413 (47.6%) | 5658 (47.8%) | |
| Male | 4858 (52.4%) | 6181 (52.2%) | |
| interview_age | 0.283 | ||
| Mean (SD) | 118.856 (7.411) | 118.967 (7.495) | |
| Range | 107.000 - 133.000 | 107.000 - 133.000 | |
| race_ethnicity.bl | < 0.001 | ||
| N-Miss | 0 | 2 | |
| Asian | 0 (0.0%) | 250 (2.1%) | |
| Black | 1363 (14.7%) | 1777 (15.0%) | |
| Hispanic | 1958 (21.1%) | 2405 (20.3%) | |
| Other | 1191 (12.8%) | 1243 (10.5%) | |
| White | 4759 (51.3%) | 6162 (52.1%) | |
| high.educ.bl | 0.938 | ||
| N-Miss | 0 | 14 | |
| < HS Diploma | 460 (5.0%) | 592 (5.0%) | |
| Bachelor | 2310 (24.9%) | 3006 (25.4%) | |
| HS Diploma/GED | 899 (9.7%) | 1129 (9.5%) | |
| Post Graduate Degree | 3179 (34.3%) | 4025 (34.0%) | |
| Some College | 2423 (26.1%) | 3073 (26.0%) | |
| prnt.empl.bl | 0.972 | ||
| N-Miss | 0 | 56 | |
| Employed | 6444 (69.5%) | 8194 (69.5%) | |
| Other | 676 (7.3%) | 855 (7.3%) | |
| Stay at Home Parent | 1612 (17.4%) | 2065 (17.5%) | |
| Unemployed | 539 (5.8%) | 669 (5.7%) | |
| neighb_phenx_avg_p.bl | 0.210 | ||
| N-Miss | 0 | 8 | |
| Mean (SD) | 3.873 (0.976) | 3.890 (0.975) | |
| Range | 1.000 - 5.000 | 1.000 - 5.000 | |
| overall.income.bl | 0.769 | ||
| N-Miss | 0 | 2 | |
| [<50k] | 2564 (27.7%) | 3215 (27.2%) | |
| [>=100K] | 3514 (37.9%) | 4544 (38.4%) | |
| [>=50K & <100K] | 2419 (26.1%) | 3065 (25.9%) | |
| [Don’t Know or Refuse] | 774 (8.3%) | 1013 (8.6%) |
#write.csv(Combined_Data_Compare_Baseline, "Combined_Data_Compare_Baseline.csv")
## Sanity check to see above p-value matches regular chi-squared in R and it does!
chisq.test(CBCL_Combined_Data[which(CBCL_Combined_Data$eventname=="Baseline"),]$label,
CBCL_Combined_Data[which(CBCL_Combined_Data$eventname=="Baseline"),]$sex.bl,
correct = FALSE)
Pearson's Chi-squared test
data: CBCL_Combined_Data[which(CBCL_Combined_Data\(eventname == "Baseline"), ]\)label and CBCL_Combined_Data[which(CBCL_Combined_Data\(eventname == "Baseline"), ]\)sex.bl X-squared = 0.076155, df = 1, p-value = 0.7826
chisq.test(CBCL_Combined_Data[which(CBCL_Combined_Data$eventname=="Baseline"),]$label,
CBCL_Combined_Data[which(CBCL_Combined_Data$eventname=="Baseline"),]$race_ethnicity.bl,
correct = FALSE)
Pearson's Chi-squared test
data: CBCL_Combined_Data[which(CBCL_Combined_Data\(eventname == "Baseline"), ]\)label and CBCL_Combined_Data[which(CBCL_Combined_Data\(eventname == "Baseline"), ]\)race_ethnicity.bl X-squared = 223.09, df = 4, p-value < 2.2e-16
chisq.test(CBCL_Combined_Data[which(CBCL_Combined_Data$eventname=="Baseline"),]$label,
CBCL_Combined_Data[which(CBCL_Combined_Data$eventname=="Baseline"),]$high.educ.bl,
correct = FALSE)
Pearson's Chi-squared test
data: CBCL_Combined_Data[which(CBCL_Combined_Data\(eventname == "Baseline"), ]\)label and CBCL_Combined_Data[which(CBCL_Combined_Data\(eventname == "Baseline"), ]\)high.educ.bl X-squared = 0.80586, df = 4, p-value = 0.9377
summary(aov(interview_age ~ label,
data=CBCL_Combined_Data[which(CBCL_Combined_Data$eventname=="Baseline"),]))
Df Sum Sq Mean Sq F value Pr(>F)
label 1 64 64.07 1.152 0.283 Residuals 21108 1174261 55.63
#---------------------------------------------------------------------------------------
# One Year Comparison
Combined_Data_rbl_OneYear <-
tableby(label ~ sex.bl + interview_age +
race_ethnicity.bl + high.educ.bl + prnt.empl.bl + neighb_phenx_avg_p.bl +
overall.income.bl,
subset= (eventname=="1-year"),
data = CBCL_Combined_Data, total=F)
Combined_Data_Compare_OneYear <- summary(Combined_Data_rbl_OneYear)
Combined_Data_Compare_OneYear
| Analysis_data (N=8759) | Full_data (N=11200) | p value | |
|---|---|---|---|
| sex.bl | 0.770 | ||
| Female | 4154 (47.4%) | 5335 (47.6%) | |
| Male | 4605 (52.6%) | 5865 (52.4%) | |
| interview_age | 0.173 | ||
| Mean (SD) | 130.924 (7.617) | 131.073 (7.714) | |
| Range | 117.000 - 149.000 | 116.000 - 149.000 | |
| race_ethnicity.bl | < 0.001 | ||
| N-Miss | 0 | 2 | |
| Asian | 0 (0.0%) | 239 (2.1%) | |
| Black | 1221 (13.9%) | 1594 (14.2%) | |
| Hispanic | 1800 (20.6%) | 2220 (19.8%) | |
| Other | 1126 (12.9%) | 1171 (10.5%) | |
| White | 4612 (52.7%) | 5974 (53.3%) | |
| high.educ.bl | 0.934 | ||
| N-Miss | 0 | 12 | |
| < HS Diploma | 405 (4.6%) | 526 (4.7%) | |
| Bachelor | 2217 (25.3%) | 2889 (25.8%) | |
| HS Diploma/GED | 800 (9.1%) | 1007 (9.0%) | |
| Post Graduate Degree | 3090 (35.3%) | 3919 (35.0%) | |
| Some College | 2247 (25.7%) | 2847 (25.4%) | |
| prnt.empl.bl | 0.965 | ||
| N-Miss | 0 | 47 | |
| Employed | 6146 (70.2%) | 7826 (70.2%) | |
| Other | 616 (7.0%) | 791 (7.1%) | |
| Stay at Home Parent | 1516 (17.3%) | 1941 (17.4%) | |
| Unemployed | 481 (5.5%) | 595 (5.3%) | |
| neighb_phenx_avg_p.bl | 0.185 | ||
| N-Miss | 0 | 5 | |
| Mean (SD) | 3.884 (0.971) | 3.903 (0.969) | |
| Range | 1.000 - 5.000 | 1.000 - 5.000 | |
| overall.income.bl | 0.784 | ||
| N-Miss | 0 | 1 | |
| [<50k] | 2335 (26.7%) | 2930 (26.2%) | |
| [>=100K] | 3418 (39.0%) | 4419 (39.5%) | |
| [>=50K & <100K] | 2312 (26.4%) | 2937 (26.2%) | |
| [Don’t Know or Refuse] | 694 (7.9%) | 913 (8.2%) |
#write.csv(Combined_Data_Compare_OneYear, "Combined_Data_Compare_OneYear.csv")
#---------------------------------------------------------------------------------------
# Two Year Comparison
Combined_Data_rbl_TwoYear <-
tableby(label ~ sex.bl + interview_age +
race_ethnicity.bl + high.educ.bl + prnt.empl.bl + neighb_phenx_avg_p.bl +
overall.income.bl,
subset= (eventname=="2-year"),
data = CBCL_Combined_Data, total=F)
Combined_Data_Compare_TwoYear <- summary(Combined_Data_rbl_TwoYear)
Combined_Data_Compare_TwoYear
| Analysis_data (N=5827) | Full_data (N=7334) | p value | |
|---|---|---|---|
| sex.bl | 0.714 | ||
| Female | 2747 (47.1%) | 3481 (47.5%) | |
| Male | 3080 (52.9%) | 3853 (52.5%) | |
| interview_age | 0.038 | ||
| Mean (SD) | 143.081 (7.635) | 143.361 (7.747) | |
| Range | 127.000 - 164.000 | 127.000 - 164.000 | |
| race_ethnicity.bl | < 0.001 | ||
| Asian | 0 (0.0%) | 158 (2.2%) | |
| Black | 678 (11.6%) | 874 (11.9%) | |
| Hispanic | 1198 (20.6%) | 1411 (19.2%) | |
| Other | 715 (12.3%) | 724 (9.9%) | |
| White | 3236 (55.5%) | 4167 (56.8%) | |
| high.educ.bl | 0.881 | ||
| N-Miss | 0 | 10 | |
| < HS Diploma | 257 (4.4%) | 306 (4.2%) | |
| Bachelor | 1551 (26.6%) | 2002 (27.3%) | |
| HS Diploma/GED | 460 (7.9%) | 568 (7.8%) | |
| Post Graduate Degree | 2093 (35.9%) | 2611 (35.6%) | |
| Some College | 1466 (25.2%) | 1837 (25.1%) | |
| prnt.empl.bl | 0.945 | ||
| N-Miss | 0 | 22 | |
| Employed | 4139 (71.0%) | 5214 (71.3%) | |
| Other | 387 (6.6%) | 474 (6.5%) | |
| Stay at Home Parent | 1002 (17.2%) | 1262 (17.3%) | |
| Unemployed | 299 (5.1%) | 362 (5.0%) | |
| neighb_phenx_avg_p.bl | 0.161 | ||
| N-Miss | 0 | 3 | |
| Mean (SD) | 3.915 (0.948) | 3.938 (0.942) | |
| Range | 1.000 - 5.000 | 1.000 - 5.000 | |
| overall.income.bl | 0.807 | ||
| [<50k] | 1494 (25.6%) | 1833 (25.0%) | |
| [>=100K] | 2307 (39.6%) | 2952 (40.3%) | |
| [>=50K & <100K] | 1597 (27.4%) | 2000 (27.3%) | |
| [Don’t Know or Refuse] | 429 (7.4%) | 549 (7.5%) |
#write.csv(Combined_Data_Compare_TwoYear, "Combined_Data_Compare_TwoYear.csv")
## Notes on how to check skewness
# If skewness is less than -1 or greater than 1, the distribution is highly skewed.
# If the kurtosis is greater than 3, then the data set has heavier tails than a normal
# distribution (more in the tails). If the kurtosis is less than 3, then the
# has lighter tails than a normal distribution (less in the tails).
skew(df_cc$cbcl_scr_syn_internal_r)
## [1] 1.906468
kurtosis(df_cc$cbcl_scr_syn_internal_r)
## [1] 4.982213
ggplot(df_cc,aes(cbcl_scr_syn_internal_r)) + geom_histogram(binwidth = 1) + facet_grid(.~eventname)
ggplot(df_cc,aes(cbcl_scr_syn_external_r)) + geom_histogram(binwidth = 1) + facet_grid(.~eventname)
ggplot(df_cc,aes(cbcl_scr_syn_anxdep_r)) + geom_histogram(binwidth = 1) + facet_grid(.~eventname)
ggplot(df_cc,aes(cbcl_scr_syn_withdep_r)) + geom_histogram(binwidth = 1) + facet_grid(.~eventname)
ggplot(df_cc,aes(cbcl_scr_syn_attention_r)) + geom_histogram(binwidth = 1) + facet_grid(.~eventname)
ggplot(df_cc,aes(cbcl_scr_syn_rulebreak_r)) + geom_histogram(binwidth = 1) + facet_grid(.~eventname)
ggplot(df_cc,aes(cbcl_scr_syn_aggressive_r)) + geom_histogram(binwidth = 1) + facet_grid(.~eventname)
ggplot(df_cc,aes(cbcl_scr_syn_totprob_r)) + geom_histogram(binwidth = 1) + facet_grid(.~eventname)
#check variance/mean ratio for overdispersion (greater than 1)
var(df_cc$cbcl_scr_syn_internal_r) %>% round(4)/mean(df_cc$cbcl_scr_syn_internal_r) %>% round(4)
## [1] 6.025142
var(df_cc$cbcl_scr_syn_external_r) %>% round(4)/mean(df_cc$cbcl_scr_syn_external_r) %>% round(4)
## [1] 7.451029
var(df_cc$cbcl_scr_syn_anxdep_r) %>% round(4)/mean(df_cc$cbcl_scr_syn_anxdep_r) %>% round(4)
## [1] 3.66044
var(df_cc$cbcl_scr_syn_withdep_r) %>% round(4)/mean(df_cc$cbcl_scr_syn_withdep_r) %>% round(4)
## [1] 2.841002
var(df_cc$cbcl_scr_syn_attention_r) %>% round(4)/mean(df_cc$cbcl_scr_syn_attention_r) %>% round(4)
## [1] 4.038821
var(df_cc$cbcl_scr_syn_rulebreak_r) %>% round(4)/mean(df_cc$cbcl_scr_syn_rulebreak_r) %>% round(4)
## [1] 2.927258
var(df_cc$cbcl_scr_syn_aggressive_r) %>% round(4)/mean(df_cc$cbcl_scr_syn_aggressive_r) %>% round(4)
## [1] 5.545168
var(df_cc$cbcl_scr_syn_totprob_r) %>% round(4)/mean(df_cc$cbcl_scr_syn_totprob_r) %>% round(4)
## [1] 17.28948
zeros <- df_cc %>%
group_by(eventname) %>%
dplyr::select(starts_with(c("eventname","cbcl"))) %>%
summarise_each(funs(sum(.==0)))
## Warning: `summarise_each_()` was deprecated in dplyr 0.7.0.
## ℹ Please use `across()` instead.
## ℹ The deprecated feature was likely used in the dplyr package.
## Please report the issue at <https://github.com/tidyverse/dplyr/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: `funs()` was deprecated in dplyr 0.8.0.
## ℹ Please use a list of either functions or lambdas:
##
## # Simple named list: list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`: tibble::lst(mean, median)
##
## # Using lambdas list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
all <- df_cc %>%
group_by(eventname) %>%
dplyr::select(starts_with(c("eventname","cbcl"))) %>%
summarise_each(funs(n()))
## Warning: `funs()` was deprecated in dplyr 0.8.0.
## ℹ Please use a list of either functions or lambdas:
##
## # Simple named list: list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`: tibble::lst(mean, median)
##
## # Using lambdas list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
zero_percentage <- cbind(zeros[1],round(zeros[-1]/all[-1],4)*100)
zero_percentage
## eventname cbcl_scr_syn_internal_r cbcl_scr_syn_external_r
## 1 Baseline 16.32 26.50
## 2 1-year 15.31 27.57
## 3 2-year 17.62 29.71
## cbcl_scr_syn_anxdep_r cbcl_scr_syn_withdep_r cbcl_scr_syn_attention_r
## 1 29.77 55.18 31.91
## 2 29.46 52.25 32.53
## 3 33.24 51.74 34.00
## cbcl_scr_syn_rulebreak_r cbcl_scr_syn_aggressive_r cbcl_scr_syn_totprob_r
## 1 50.78 32.35 4.21
## 2 53.24 33.30 4.53
## 3 56.08 35.37 5.44
#select certain columns
subject_sum <- df_cc %>%
dplyr::select(starts_with(c("subject","cbcl")))
#add up all scores across subjec
subject_summed <- ddply(subject_sum, "subjectid", numcolwise(sum))
#look at data
describe(subject_summed[,2:9])
## vars n mean sd median trimmed mad min max
## cbcl_scr_syn_internal_r 1 9273 13.31 13.36 9 11.06 8.90 0 130
## cbcl_scr_syn_external_r 2 9273 10.96 13.51 6 8.32 7.41 0 112
## cbcl_scr_syn_anxdep_r 3 9273 6.51 7.20 4 5.25 4.45 0 64
## cbcl_scr_syn_withdep_r 4 9273 2.91 4.12 1 2.03 1.48 0 33
## cbcl_scr_syn_attention_r 5 9273 7.57 8.45 5 6.09 7.41 0 53
## cbcl_scr_syn_rulebreak_r 6 9273 2.95 4.24 1 2.06 1.48 0 39
## cbcl_scr_syn_aggressive_r 7 9273 8.01 9.94 5 6.06 5.93 0 76
## cbcl_scr_syn_totprob_r 8 9273 46.03 43.09 33 38.92 31.13 0 370
## range skew kurtosis se
## cbcl_scr_syn_internal_r 130 1.91 5.13 0.14
## cbcl_scr_syn_external_r 112 2.26 6.75 0.14
## cbcl_scr_syn_anxdep_r 64 1.93 5.06 0.07
## cbcl_scr_syn_withdep_r 33 2.31 6.65 0.04
## cbcl_scr_syn_attention_r 53 1.54 2.40 0.09
## cbcl_scr_syn_rulebreak_r 39 2.62 9.72 0.04
## cbcl_scr_syn_aggressive_r 76 2.15 5.90 0.10
## cbcl_scr_syn_totprob_r 370 1.81 4.31 0.45
#percentage of subjects who have zero across all timepoints
all_zeros <- as.data.frame(colSums(subject_summed == 0))
all_zeros$percentage_allzero <- all_zeros$`colSums(subject_summed == 0)`/nrow(subject_summed)
all_zeros$percentage_allzero <- round(all_zeros$percentage_allzero,4)*100
percentage_allzero <- all_zeros$percentage_allzero
#distribution
ggplot(subject_summed,aes(cbcl_scr_syn_internal_r)) + geom_histogram(binwidth = 1)
ggplot(subject_summed,aes(cbcl_scr_syn_external_r)) + geom_histogram(binwidth = 1)
ggplot(subject_summed,aes(cbcl_scr_syn_anxdep_r)) + geom_histogram(binwidth = 1)
ggplot(subject_summed,aes(cbcl_scr_syn_withdep_r)) + geom_histogram(binwidth = 1)
ggplot(subject_summed,aes(cbcl_scr_syn_attention_r)) + geom_histogram(binwidth = 1)
ggplot(subject_summed,aes(cbcl_scr_syn_rulebreak_r)) + geom_histogram(binwidth = 1)
ggplot(subject_summed,aes(cbcl_scr_syn_aggressive_r)) + geom_histogram(binwidth = 1)
ggplot(subject_summed,aes(cbcl_scr_syn_totprob_r)) + geom_histogram(binwidth = 1)
#Percentage of average people who have 0 at baseline and have 0 the whole time
zero_comparison <- rbind(zero_percentage, percentage_allzero)
## Warning in `[<-.factor`(`*tmp*`, ri, value = 0): invalid factor level, NA
## generated
zero_comparison$eventname <- as.character(zero_comparison$eventname)
zero_comparison[4,1] <- "All"
zero_comparison <- t(zero_comparison)
colnames(zero_comparison) <- c("Baseline","1-year","2-year","All")
zero_comparison <- as.data.frame(zero_comparison[2:9,])
zero_comparison <- lapply(zero_comparison,as.numeric)
zero_comparison$zeros_of_bl <- zero_comparison$All/zero_comparison$Baseline
zero_comparison$CBCL_Outcome <- c("cbcl_scr_syn_internal_r","cbcl_scr_syn_external_r","cbcl_scr_syn_anxdep_r","cbcl_scr_syn_withdep_r","cbcl_scr_syn_attention_r","cbcl_scr_syn_rulebreak_r","cbcl_scr_syn_aggressive_r","cbcl_scr_syn_totprob_r")
# Necessary libraries
# For lmer()
suppressPackageStartupMessages(library(lme4))
# For ICC
suppressPackageStartupMessages(library(performance))
# For repeatability estimate (rICC)
suppressPackageStartupMessages(library(rptR))
#internal
## all datapts
internal_icc_model <-lmer(cbcl_scr_syn_internal_r ~ 1 + (1 | subjectid),
data = df_cc,
REML = TRUE)
summary(internal_icc_model)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_internal_r ~ 1 + (1 | subjectid)
## Data: df_cc
##
## REML criterion at convergence: 139913.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.7331 -0.4231 -0.1370 0.3163 7.5438
##
## Random effects:
## Groups Name Variance Std.Dev.
## subjectid (Intercept) 21.30 4.615
## Residual 10.09 3.177
## Number of obs: 23857, groups: subjectid, 9273
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 5.19071 0.05239 99.08
icc(internal_icc_model)
## # Intraclass Correlation Coefficient
##
## Adjusted ICC: 0.679
## Unadjusted ICC: 0.679
## zeros
internal_zero_ids_df <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_internal_r")) %>% filter(cbcl_scr_syn_internal_r==0)
internal_zero_ids <- unique(internal_zero_ids_df$subjectid)
internal_zero <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_internal_r")) %>% filter(subjectid %in% internal_zero_ids)
internal_icc_model_zeros <-lmer(cbcl_scr_syn_internal_r ~ 1 + (1 | subjectid),
data = internal_zero,
REML = TRUE)
summary(internal_icc_model_zeros)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_internal_r ~ 1 + (1 | subjectid)
## Data: internal_zero
##
## REML criterion at convergence: 30686.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.4925 -0.5231 -0.4661 0.3179 11.9425
##
## Random effects:
## Groups Name Variance Std.Dev.
## subjectid (Intercept) 0.3517 0.593
## Residual 4.6593 2.159
## Number of obs: 6902, groups: subjectid, 2613
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 1.23390 0.02854 43.23
icc(internal_icc_model_zeros)
## # Intraclass Correlation Coefficient
##
## Adjusted ICC: 0.070
## Unadjusted ICC: 0.070
#external
## all datapts
external_icc_model <-lmer(cbcl_scr_syn_external_r ~ 1 + (1 | subjectid),
data = df_cc,
REML = TRUE)
summary(external_icc_model)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_external_r ~ 1 + (1 | subjectid)
## Data: df_cc
##
## REML criterion at convergence: 138094.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.5881 -0.3616 -0.1576 0.2581 8.4310
##
## Random effects:
## Groups Name Variance Std.Dev.
## subjectid (Intercept) 23.784 4.877
## Residual 8.522 2.919
## Number of obs: 23857, groups: subjectid, 9273
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 4.31205 0.05426 79.47
icc(external_icc_model)
## # Intraclass Correlation Coefficient
##
## Adjusted ICC: 0.736
## Unadjusted ICC: 0.736
## zeros
external_zero_ids_df <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_external_r")) %>% filter(cbcl_scr_syn_external_r==0)
external_zero_ids <- unique(external_zero_ids_df$subjectid)
external_zero <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_external_r")) %>% filter(subjectid %in% external_zero_ids)
external_icc_model_zeros <-lmer(cbcl_scr_syn_external_r ~ 1 + (1 | subjectid),
data = external_zero,
REML = TRUE)
summary(external_icc_model_zeros)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_external_r ~ 1 + (1 | subjectid)
## Data: external_zero
##
## REML criterion at convergence: 41969.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.3217 -0.4121 -0.3631 0.1030 16.5952
##
## Random effects:
## Groups Name Variance Std.Dev.
## subjectid (Intercept) 0.3897 0.6242
## Residual 3.2472 1.8020
## Number of obs: 10187, groups: subjectid, 3841
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.88988 0.02058 43.24
icc(external_icc_model_zeros)
## # Intraclass Correlation Coefficient
##
## Adjusted ICC: 0.107
## Unadjusted ICC: 0.107
#anxdep
## all datapts
anxdep_icc_model <-lmer(cbcl_scr_syn_anxdep_r ~ 1 + (1 | subjectid),
data = df_cc,
REML = TRUE)
summary(anxdep_icc_model)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_anxdep_r ~ 1 + (1 | subjectid)
## Data: df_cc
##
## REML criterion at convergence: 111637.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.9202 -0.4008 -0.2045 0.3574 6.8767
##
## Random effects:
## Groups Name Variance Std.Dev.
## subjectid (Intercept) 6.162 2.482
## Residual 3.166 1.779
## Number of obs: 23857, groups: subjectid, 9273
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 2.54257 0.02837 89.62
icc(anxdep_icc_model)
## # Intraclass Correlation Coefficient
##
## Adjusted ICC: 0.661
## Unadjusted ICC: 0.661
## zeros
anxdep_zero_ids_df <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_anxdep_r")) %>% filter(cbcl_scr_syn_anxdep_r==0)
anxdep_zero_ids <- unique(anxdep_zero_ids_df$subjectid)
anxdep_zero <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_anxdep_r")) %>% filter(subjectid %in% anxdep_zero_ids)
anxdep_icc_model_zeros <-lmer(cbcl_scr_syn_anxdep_r ~ 1 + (1 | subjectid),
data = anxdep_zero,
REML = TRUE)
summary(anxdep_icc_model_zeros)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_anxdep_r ~ 1 + (1 | subjectid)
## Data: anxdep_zero
##
## REML criterion at convergence: 39764.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.9205 -0.4821 -0.4268 0.2226 11.4628
##
## Random effects:
## Groups Name Variance Std.Dev.
## subjectid (Intercept) 0.1733 0.4163
## Residual 1.8089 1.3450
## Number of obs: 11309, groups: subjectid, 4295
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.7390 0.0142 52.03
icc(anxdep_icc_model_zeros)
## # Intraclass Correlation Coefficient
##
## Adjusted ICC: 0.087
## Unadjusted ICC: 0.087
#withdep
## all datapts
withdep_icc_model <-lmer(cbcl_scr_syn_withdep_r ~ 1 + (1 | subjectid),
data = df_cc,
REML = TRUE)
summary(withdep_icc_model)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_withdep_r ~ 1 + (1 | subjectid)
## Data: df_cc
##
## REML criterion at convergence: 88075.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.6284 -0.3950 -0.1783 0.2933 9.4069
##
## Random effects:
## Groups Name Variance Std.Dev.
## subjectid (Intercept) 1.964 1.402
## Residual 1.267 1.125
## Number of obs: 23857, groups: subjectid, 9273
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 1.13401 0.01636 69.3
icc(withdep_icc_model)
## # Intraclass Correlation Coefficient
##
## Adjusted ICC: 0.608
## Unadjusted ICC: 0.608
## zeros
withdep_zero_ids_df <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_withdep_r")) %>% filter(cbcl_scr_syn_withdep_r==0)
withdep_zero_ids <- unique(withdep_zero_ids_df$subjectid)
withdep_zero <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_withdep_r")) %>% filter(subjectid %in% withdep_zero_ids)
withdep_icc_model_zeros <-lmer(cbcl_scr_syn_withdep_r ~ 1 + (1 | subjectid),
data = withdep_zero,
REML = TRUE)
summary(withdep_icc_model_zeros)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_withdep_r ~ 1 + (1 | subjectid)
## Data: withdep_zero
##
## REML criterion at convergence: 46227.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.0703 -0.4434 -0.3531 0.2156 15.7045
##
## Random effects:
## Groups Name Variance Std.Dev.
## subjectid (Intercept) 0.0872 0.2953
## Residual 0.8103 0.9001
## Number of obs: 16981, groups: subjectid, 6501
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.42042 0.00785 53.56
icc(withdep_icc_model_zeros)
## # Intraclass Correlation Coefficient
##
## Adjusted ICC: 0.097
## Unadjusted ICC: 0.097
#attention
## all datapts
attention_icc_model <-lmer(cbcl_scr_syn_attention_r ~ 1 + (1 | subjectid),
data = df_cc,
REML = TRUE)
summary(attention_icc_model)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_attention_r ~ 1 + (1 | subjectid)
## Data: df_cc
##
## REML criterion at convergence: 113290.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.0804 -0.3522 -0.1670 0.3374 4.7753
##
## Random effects:
## Groups Name Variance Std.Dev.
## subjectid (Intercept) 9.090 3.015
## Residual 2.894 1.701
## Number of obs: 23857, groups: subjectid, 9273
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 2.96173 0.03331 88.92
icc(attention_icc_model)
## # Intraclass Correlation Coefficient
##
## Adjusted ICC: 0.759
## Unadjusted ICC: 0.759
## zeros
attention_zero_ids_df <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_attention_r")) %>% filter(cbcl_scr_syn_attention_r==0)
attention_zero_ids <- unique(attention_zero_ids_df$subjectid)
attention_zero <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_attention_r")) %>% filter(subjectid %in% attention_zero_ids)
attention_icc_model_zeros <-lmer(cbcl_scr_syn_attention_r ~ 1 + (1 | subjectid),
data = attention_zero,
REML = TRUE)
summary(attention_icc_model_zeros)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_attention_r ~ 1 + (1 | subjectid)
## Data: attention_zero
##
## REML criterion at convergence: 38271.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.6213 -0.4568 -0.3921 0.2748 10.4620
##
## Random effects:
## Groups Name Variance Std.Dev.
## subjectid (Intercept) 0.1694 0.4116
## Residual 1.5769 1.2558
## Number of obs: 11296, groups: subjectid, 4307
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.65109 0.01343 48.48
icc(attention_icc_model_zeros)
## # Intraclass Correlation Coefficient
##
## Adjusted ICC: 0.097
## Unadjusted ICC: 0.097
#rulebreak
## all datapts
rulebreak_icc_model <-lmer(cbcl_scr_syn_rulebreak_r ~ 1 + (1 | subjectid),
data = df_cc,
REML = TRUE)
summary(rulebreak_icc_model)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_rulebreak_r ~ 1 + (1 | subjectid)
## Data: df_cc
##
## REML criterion at convergence: 87731.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.2835 -0.3999 -0.1589 0.2497 11.4049
##
## Random effects:
## Groups Name Variance Std.Dev.
## subjectid (Intercept) 2.253 1.501
## Residual 1.165 1.079
## Number of obs: 23857, groups: subjectid, 9273
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 1.16649 0.01716 67.96
icc(rulebreak_icc_model)
## # Intraclass Correlation Coefficient
##
## Adjusted ICC: 0.659
## Unadjusted ICC: 0.659
## zeros
rulebreak_zero_ids_df <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_rulebreak_r")) %>% filter(cbcl_scr_syn_rulebreak_r==0)
rulebreak_zero_ids <- unique(rulebreak_zero_ids_df$subjectid)
rulebreak_zero <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_rulebreak_r")) %>% filter(subjectid %in% rulebreak_zero_ids)
rulebreak_icc_model_zeros <-lmer(cbcl_scr_syn_rulebreak_r ~ 1 + (1 | subjectid),
data = rulebreak_zero,
REML = TRUE)
summary(rulebreak_icc_model_zeros)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_rulebreak_r ~ 1 + (1 | subjectid)
## Data: rulebreak_zero
##
## REML criterion at convergence: 44269.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.1406 -0.4507 -0.3568 -0.3568 16.6498
##
## Random effects:
## Groups Name Variance Std.Dev.
## subjectid (Intercept) 0.07883 0.2808
## Residual 0.73976 0.8601
## Number of obs: 16830, groups: subjectid, 6403
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.40502 0.00753 53.79
icc(rulebreak_icc_model_zeros)
## # Intraclass Correlation Coefficient
##
## Adjusted ICC: 0.096
## Unadjusted ICC: 0.096
#aggressive
## all datapts
aggressive_icc_model <-lmer(cbcl_scr_syn_aggressive_r ~ 1 + (1 | subjectid),
data = df_cc,
REML = TRUE)
summary(aggressive_icc_model)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_aggressive_r ~ 1 + (1 | subjectid)
## Data: df_cc
##
## REML criterion at convergence: 124034.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.7104 -0.3608 -0.1606 0.2624 8.5561
##
## Random effects:
## Groups Name Variance Std.Dev.
## subjectid (Intercept) 12.715 3.566
## Residual 4.816 2.195
## Number of obs: 23857, groups: subjectid, 9273
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 3.14385 0.03982 78.95
icc(aggressive_icc_model)
## # Intraclass Correlation Coefficient
##
## Adjusted ICC: 0.725
## Unadjusted ICC: 0.725
## zeros
aggressive_zero_ids_df <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_aggressive_r")) %>% filter(cbcl_scr_syn_aggressive_r==0)
aggressive_zero_ids <- unique(aggressive_zero_ids_df$subjectid)
aggressive_zero <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_aggressive_r")) %>% filter(subjectid %in% aggressive_zero_ids)
aggressive_icc_model_zeros <-lmer(cbcl_scr_syn_aggressive_r ~ 1 + (1 | subjectid),
data = aggressive_zero,
REML = TRUE)
summary(aggressive_icc_model_zeros)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_aggressive_r ~ 1 + (1 | subjectid)
## Data: aggressive_zero
##
## REML criterion at convergence: 43014.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.1781 -0.4252 -0.3648 0.2057 12.7275
##
## Random effects:
## Groups Name Variance Std.Dev.
## subjectid (Intercept) 0.248 0.498
## Residual 2.092 1.446
## Number of obs: 11691, groups: subjectid, 4434
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.71521 0.01539 46.48
icc(aggressive_icc_model_zeros)
## # Intraclass Correlation Coefficient
##
## Adjusted ICC: 0.106
## Unadjusted ICC: 0.106
# Necessary libraries
# For lmer()
suppressPackageStartupMessages(library(lme4))
# For ICC
suppressPackageStartupMessages(library(performance))
# For repeatability estimate (rICC)
suppressPackageStartupMessages(library(rptR))
#internal
## nonzeros
internal_nonzero_ids_df <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_internal_r")) %>% filter(cbcl_scr_syn_internal_r>0)
internal_nonzero_ids <- unique(internal_nonzero_ids_df$subjectid)
internal_nonzero <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_internal_r")) %>% filter(subjectid %in% internal_nonzero_ids)
internal_icc_model_nonzeros <-lmer(cbcl_scr_syn_internal_r ~ 1 + (1 | subjectid),
data = internal_nonzero,
REML = TRUE)
summary(internal_icc_model_nonzeros)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_internal_r ~ 1 + (1 | subjectid)
## Data: internal_nonzero
##
## REML criterion at convergence: 132669.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.5655 -0.4615 -0.1141 0.3359 7.4038
##
## Random effects:
## Groups Name Variance Std.Dev.
## subjectid (Intercept) 20.90 4.571
## Residual 10.65 3.264
## Number of obs: 22511, groups: subjectid, 8697
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 5.52520 0.05387 102.6
icc(internal_icc_model_nonzeros)
## # Intraclass Correlation Coefficient
##
## Adjusted ICC: 0.662
## Unadjusted ICC: 0.662
#external
## nonzeros
external_nonzero_ids_df <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_external_r")) %>% filter(cbcl_scr_syn_external_r>0)
external_nonzero_ids <- unique(external_nonzero_ids_df$subjectid)
external_nonzero <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_external_r")) %>% filter(subjectid %in% external_nonzero_ids)
external_icc_model_nonzeros <-lmer(cbcl_scr_syn_external_r ~ 1 + (1 | subjectid),
data = external_nonzero,
REML = TRUE)
summary(external_icc_model_nonzeros)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_external_r ~ 1 + (1 | subjectid)
## Data: external_nonzero
##
## REML criterion at convergence: 120925
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.1355 -0.4037 -0.1097 0.2963 7.8976
##
## Random effects:
## Groups Name Variance Std.Dev.
## subjectid (Intercept) 24.191 4.918
## Residual 9.862 3.140
## Number of obs: 20535, groups: subjectid, 7928
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 5.03510 0.05966 84.4
icc(external_icc_model_nonzeros)
## # Intraclass Correlation Coefficient
##
## Adjusted ICC: 0.710
## Unadjusted ICC: 0.710
#anxdep
## nonzeros
anxdep_nonzero_ids_df <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_anxdep_r")) %>% filter(cbcl_scr_syn_anxdep_r>0)
anxdep_nonzero_ids <- unique(anxdep_nonzero_ids_df$subjectid)
anxdep_nonzero <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_anxdep_r")) %>% filter(subjectid %in% anxdep_nonzero_ids)
anxdep_icc_model_nonzeros <-lmer(cbcl_scr_syn_anxdep_r ~ 1 + (1 | subjectid),
data = anxdep_nonzero,
REML = TRUE)
summary(anxdep_icc_model_nonzeros)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_anxdep_r ~ 1 + (1 | subjectid)
## Data: anxdep_nonzero
##
## REML criterion at convergence: 96834.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.4485 -0.5208 -0.0904 0.3756 6.4347
##
## Random effects:
## Groups Name Variance Std.Dev.
## subjectid (Intercept) 5.928 2.435
## Residual 3.699 1.923
## Number of obs: 20296, groups: subjectid, 7809
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 3.00963 0.03083 97.63
icc(anxdep_icc_model_nonzeros)
## # Intraclass Correlation Coefficient
##
## Adjusted ICC: 0.616
## Unadjusted ICC: 0.616
#withdep
## nonzeros
withdep_nonzero_ids_df <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_withdep_r")) %>% filter(cbcl_scr_syn_withdep_r>0)
withdep_nonzero_ids <- unique(withdep_nonzero_ids_df$subjectid)
withdep_nonzero <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_withdep_r")) %>% filter(subjectid %in% withdep_nonzero_ids)
withdep_icc_model_nonzeros <-lmer(cbcl_scr_syn_withdep_r ~ 1 + (1 | subjectid),
data = withdep_nonzero,
REML = TRUE)
summary(withdep_icc_model_nonzeros)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_withdep_r ~ 1 + (1 | subjectid)
## Data: withdep_nonzero
##
## REML criterion at convergence: 61960.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.3003 -0.4962 -0.1340 0.3966 7.7721
##
## Random effects:
## Groups Name Variance Std.Dev.
## subjectid (Intercept) 1.974 1.405
## Residual 1.927 1.388
## Number of obs: 15539, groups: subjectid, 5945
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 1.75744 0.02148 81.83
icc(withdep_icc_model_nonzeros)
## # Intraclass Correlation Coefficient
##
## Adjusted ICC: 0.506
## Unadjusted ICC: 0.506
#attention
## nonzeros
attention_nonzero_ids_df <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_attention_r")) %>% filter(cbcl_scr_syn_attention_r>0)
attention_nonzero_ids <- unique(attention_nonzero_ids_df$subjectid)
attention_nonzero <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_attention_r")) %>% filter(subjectid %in% attention_nonzero_ids)
attention_icc_model_nonzeros <-lmer(cbcl_scr_syn_attention_r ~ 1 + (1 | subjectid),
data = attention_nonzero,
REML = TRUE)
summary(attention_icc_model_nonzeros)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_attention_r ~ 1 + (1 | subjectid)
## Data: attention_nonzero
##
## REML criterion at convergence: 94139
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.6399 -0.5107 -0.0526 0.4245 4.3271
##
## Random effects:
## Groups Name Variance Std.Dev.
## subjectid (Intercept) 8.696 2.949
## Residual 3.545 1.883
## Number of obs: 19352, groups: subjectid, 7448
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 3.67853 0.03689 99.72
icc(attention_icc_model_nonzeros)
## # Intraclass Correlation Coefficient
##
## Adjusted ICC: 0.710
## Unadjusted ICC: 0.710
#rulebreak
## nonzeros
rulebreak_nonzero_ids_df <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_rulebreak_r")) %>% filter(cbcl_scr_syn_rulebreak_r>0)
rulebreak_nonzero_ids <- unique(rulebreak_nonzero_ids_df$subjectid)
rulebreak_nonzero <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_rulebreak_r")) %>% filter(subjectid %in% rulebreak_nonzero_ids)
rulebreak_icc_model_nonzeros <-lmer(cbcl_scr_syn_rulebreak_r ~ 1 + (1 | subjectid),
data = rulebreak_nonzero,
REML = TRUE)
summary(rulebreak_icc_model_nonzeros)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_rulebreak_r ~ 1 + (1 | subjectid)
## Data: rulebreak_nonzero
##
## REML criterion at convergence: 62044.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.0710 -0.4704 -0.1196 0.3152 9.5994
##
## Random effects:
## Groups Name Variance Std.Dev.
## subjectid (Intercept) 2.352 1.534
## Residual 1.773 1.332
## Number of obs: 15592, groups: subjectid, 6005
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 1.7930 0.0226 79.33
icc(rulebreak_icc_model_nonzeros)
## # Intraclass Correlation Coefficient
##
## Adjusted ICC: 0.570
## Unadjusted ICC: 0.570
#aggressive
## nonzeros
aggressive_nonzero_ids_df <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_aggressive_r")) %>% filter(cbcl_scr_syn_aggressive_r>0)
aggressive_nonzero_ids <- unique(aggressive_nonzero_ids_df$subjectid)
aggressive_nonzero <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_aggressive_r")) %>% filter(subjectid %in% aggressive_nonzero_ids)
aggressive_icc_model_nonzeros <-lmer(cbcl_scr_syn_aggressive_r ~ 1 + (1 | subjectid),
data = aggressive_nonzero,
REML = TRUE)
summary(aggressive_icc_model_nonzeros)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_aggressive_r ~ 1 + (1 | subjectid)
## Data: aggressive_nonzero
##
## REML criterion at convergence: 103482
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.0814 -0.4505 -0.1032 0.3293 7.8256
##
## Random effects:
## Groups Name Variance Std.Dev.
## subjectid (Intercept) 12.875 3.588
## Residual 5.895 2.428
## Number of obs: 19395, groups: subjectid, 7473
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 3.8917 0.0452 86.09
icc(aggressive_icc_model_nonzeros)
## # Intraclass Correlation Coefficient
##
## Adjusted ICC: 0.686
## Unadjusted ICC: 0.686
#pm2.5
mean(df_cc$reshist_addr1_pm252016aa_bl[df_cc$eventname=="Baseline"])
## [1] 7.706378
sd(df_cc$reshist_addr1_pm252016aa_bl[df_cc$eventname=="Baseline"])
## [1] 1.570865
range(df_cc$reshist_addr1_pm252016aa_bl[df_cc$eventname=="Baseline"])
## [1] 1.722393 15.901970
hist(df_cc$reshist_addr1_pm252016aa_bl[df_cc$eventname=="Baseline"])
##test difference with EPA level
t.test(df_cc$reshist_addr1_pm252016aa_bl[df_cc$eventname=="Baseline"], mu = 12, alternative = "two.sided")
##
## One Sample t-test
##
## data: df_cc$reshist_addr1_pm252016aa_bl[df_cc$eventname == "Baseline"]
## t = -263.18, df = 9270, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 12
## 95 percent confidence interval:
## 7.674398 7.738358
## sample estimates:
## mean of x
## 7.706378
#no2
mean(df_cc$reshist_addr1_no2_2016_aavg_bl[df_cc$eventname=="Baseline"])
## [1] 18.59514
sd(df_cc$reshist_addr1_no2_2016_aavg_bl[df_cc$eventname=="Baseline"])
## [1] 5.57146
range(df_cc$reshist_addr1_no2_2016_aavg_bl[df_cc$eventname=="Baseline"])
## [1] 0.7291464 37.9399900
hist(df_cc$reshist_addr1_no2_2016_aavg_bl[df_cc$eventname=="Baseline"])
##test difference with EPA level
t.test(df_cc$reshist_addr1_no2_2016_aavg_bl[df_cc$eventname=="Baseline"], mu = 53, alternative = "two.sided")
##
## One Sample t-test
##
## data: df_cc$reshist_addr1_no2_2016_aavg_bl[df_cc$eventname == "Baseline"]
## t = -594.59, df = 9270, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 53
## 95 percent confidence interval:
## 18.48171 18.70857
## sample estimates:
## mean of x
## 18.59514
internal_histogram <- ggplot(df_cc, aes(x=cbcl_scr_syn_internal_r)) + geom_histogram() + facet_grid(~eventname) + labs(x="Internalizing Behavior") +
theme(axis.title.x = element_text(face="bold"))
external_histogram <- ggplot(df_cc, aes(x=cbcl_scr_syn_external_r)) + geom_histogram() + facet_grid(~eventname) + labs(x="Externalizing Behavior") +
theme(axis.title.x = element_text(face="bold"))
anxdep_histogram <- ggplot(df_cc, aes(x=cbcl_scr_syn_anxdep_r)) + geom_histogram() + facet_grid(~eventname) + labs(x="Anxious/Depressed") +
theme(axis.title.x = element_text(face="bold"))
withdep_histogram <- ggplot(df_cc, aes(x=cbcl_scr_syn_withdep_r)) + geom_histogram() + facet_grid(~eventname) + labs(x="Withdrawn/Depressed") +
theme(axis.title.x = element_text(face="bold"))
attention_histogram <- ggplot(df_cc, aes(x=cbcl_scr_syn_attention_r)) + geom_histogram() + facet_grid(~eventname) + labs(x="Attention Problems") +
theme(axis.title.x = element_text(face="bold"))
rulebreak_histogram <- ggplot(df_cc, aes(x=cbcl_scr_syn_rulebreak_r)) + geom_histogram() + facet_grid(~eventname) + labs(x="Rule-breaking Behavior") +
theme(axis.title.x = element_text(face="bold"))
aggressive_histogram <- ggplot(df_cc, aes(x=cbcl_scr_syn_aggressive_r)) + geom_histogram() + facet_grid(~eventname) + labs(x="Aggressive Behavior") +
theme(axis.title.x = element_text(face="bold"))
#export above
##arrange plots
histograms <- grid.arrange(internal_histogram, anxdep_histogram, withdep_histogram, external_histogram, rulebreak_histogram, aggressive_histogram, attention_histogram, ncol=3)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
##save
#ggsave(histograms,device="png",filename="Figures/CBCL_Histograms_2.7.23.png",width=12,height=12, units = "in")
#internal
internal_linear <- df_cc %>% dplyr::select(c("subjectid","eventname","cbcl_scr_syn_internal_r","reshist_addr1_pm252016aa_bl","reshist_addr1_no2_2016_aavg_bl")) %>% filter(cbcl_scr_syn_internal_r>0)
internal_linear_pm <- ggplot(internal_linear,aes(x=cbcl_scr_syn_internal_r,y=reshist_addr1_pm252016aa_bl)) + geom_point() + geom_smooth() + facet_grid(~eventname) + labs(x="internal",y="pm2.5")
internal_linear_no <- ggplot(internal_linear,aes(x=cbcl_scr_syn_internal_r,y=reshist_addr1_no2_2016_aavg_bl)) + geom_point() + geom_smooth() + facet_grid(~eventname) + labs(x="internal",y="no2")
#anxdep
anxdep_linear <- df_cc %>% dplyr::select(c("subjectid","eventname","cbcl_scr_syn_anxdep_r","reshist_addr1_pm252016aa_bl","reshist_addr1_no2_2016_aavg_bl")) %>% filter(cbcl_scr_syn_anxdep_r>0)
anxdep_linear_pm <- ggplot(anxdep_linear,aes(x=cbcl_scr_syn_anxdep_r,y=reshist_addr1_pm252016aa_bl)) + geom_point() + geom_smooth() + facet_grid(~eventname) + labs(x="anxdep",y="pm2.5")
anxdep_linear_no <- ggplot(anxdep_linear,aes(x=cbcl_scr_syn_anxdep_r,y=reshist_addr1_no2_2016_aavg_bl)) + geom_point() + geom_smooth() + facet_grid(~eventname) + labs(x="anxdep",y="no2")
#withdep
withdep_linear <- df_cc %>% dplyr::select(c("subjectid","eventname","cbcl_scr_syn_withdep_r","reshist_addr1_pm252016aa_bl","reshist_addr1_no2_2016_aavg_bl")) %>% filter(cbcl_scr_syn_withdep_r>0)
withdep_linear_pm <- ggplot(withdep_linear,aes(x=cbcl_scr_syn_withdep_r,y=reshist_addr1_pm252016aa_bl)) + geom_point() + geom_smooth() + facet_grid(~eventname) + labs(x="withdep",y="pm2.5")
withdep_linear_no <- ggplot(withdep_linear,aes(x=cbcl_scr_syn_withdep_r,y=reshist_addr1_no2_2016_aavg_bl)) + geom_point() + geom_smooth() + facet_grid(~eventname) + labs(x="withdep",y="no2")
#external
external_linear <- df_cc %>% dplyr::select(c("subjectid","eventname","cbcl_scr_syn_external_r","reshist_addr1_pm252016aa_bl","reshist_addr1_no2_2016_aavg_bl")) %>% filter(cbcl_scr_syn_external_r>0)
external_linear_pm <- ggplot(external_linear,aes(x=cbcl_scr_syn_external_r,y=reshist_addr1_pm252016aa_bl)) + geom_point() + geom_smooth() + facet_grid(~eventname) + labs(x="external",y="pm2.5")
external_linear_no <- ggplot(external_linear,aes(x=cbcl_scr_syn_external_r,y=reshist_addr1_no2_2016_aavg_bl)) + geom_point() + geom_smooth() + facet_grid(~eventname) + labs(x="external",y="no2")
#rulebreak
rulebreak_linear <- df_cc %>% dplyr::select(c("subjectid","eventname","cbcl_scr_syn_rulebreak_r","reshist_addr1_pm252016aa_bl","reshist_addr1_no2_2016_aavg_bl")) %>% filter(cbcl_scr_syn_rulebreak_r>0)
rulebreak_linear_pm <- ggplot(rulebreak_linear,aes(x=cbcl_scr_syn_rulebreak_r,y=reshist_addr1_pm252016aa_bl)) + geom_point() + geom_smooth() + facet_grid(~eventname) + labs(x="rulebreak",y="pm2.5")
rulebreak_linear_no <- ggplot(rulebreak_linear,aes(x=cbcl_scr_syn_rulebreak_r,y=reshist_addr1_no2_2016_aavg_bl)) + geom_point() + geom_smooth() + facet_grid(~eventname) + labs(x="rulebreak",y="no2")
#aggressive
aggressive_linear <- df_cc %>% dplyr::select(c("subjectid","eventname","cbcl_scr_syn_aggressive_r","reshist_addr1_pm252016aa_bl","reshist_addr1_no2_2016_aavg_bl")) %>% filter(cbcl_scr_syn_aggressive_r>0)
aggressive_linear_pm <- ggplot(aggressive_linear,aes(x=cbcl_scr_syn_aggressive_r,y=reshist_addr1_pm252016aa_bl)) + geom_point() + geom_smooth() + facet_grid(~eventname) + labs(x="aggressive",y="pm2.5")
aggressive_linear_no <- ggplot(aggressive_linear,aes(x=cbcl_scr_syn_aggressive_r,y=reshist_addr1_no2_2016_aavg_bl)) + geom_point() + geom_smooth() + facet_grid(~eventname) + labs(x="aggressive",y="no2")
#attention
attention_linear <- df_cc %>% dplyr::select(c("subjectid","eventname","cbcl_scr_syn_attention_r","reshist_addr1_pm252016aa_bl","reshist_addr1_no2_2016_aavg_bl")) %>% filter(cbcl_scr_syn_attention_r>0)
attention_linear_pm <- ggplot(attention_linear,aes(x=cbcl_scr_syn_attention_r,y=reshist_addr1_pm252016aa_bl)) + geom_point() + geom_smooth() + facet_grid(~eventname) + labs(x="attention",y="pm2.5")
attention_linear_no <- ggplot(attention_linear,aes(x=cbcl_scr_syn_attention_r,y=reshist_addr1_no2_2016_aavg_bl)) + geom_point() + geom_smooth() + facet_grid(~eventname) + labs(x="attention",y="no2")
#all graphs
linearity_pm <- grid.arrange(internal_linear_pm, anxdep_linear_pm, withdep_linear_pm, external_linear_pm, rulebreak_linear_pm, aggressive_linear_pm, attention_linear_pm, ncol=1)
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
linearity_no <- grid.arrange(internal_linear_no, anxdep_linear_no, withdep_linear_no, external_linear_no, rulebreak_linear_no, aggressive_linear_no, attention_linear_no, ncol=1)
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
##save
#ggsave(linearity_pm,device="png",filename="Figures/CBCL.PM2p5_Linearity_6.3.23.png",width=12,height=12, units = "in")
#ggsave(linearity_no,device="png",filename="Figures/CBCL.NO2_Linearity_6.3.23.png",width=12,height=12, units = "in")
Zero-Inflated (ZI) Negative Binomial (NB): glmm.zinb in NBZIMM package.
CBCL only needs one nested random intercept since we eliminated the family nesting by choosing one kid per family.
For the negative binomial portion of the model, we do not nest by subject since the ICC across subjects is very low.
internal_zinb_r <- glmm.zinb(cbcl_scr_syn_internal_r ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl+ prnt.empl.bl + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl, random = ~1|abcd_site/subjectid,
zi_fixed = ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl+ prnt.empl.bl + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl, zi_random = ~1|abcd_site, data = df_cc)
## Computational iterations: 7
## Computational time: 0.906 minutes
summary(internal_zinb_r)
## Linear mixed-effects model fit by maximum likelihood
## Data: df_cc
## AIC BIC logLik
## NA NA NA
##
## Random effects:
## Formula: ~1 | abcd_site
## (Intercept)
## StdDev: 0.1021209
##
## Formula: ~1 | subjectid %in% abcd_site
## (Intercept) Residual
## StdDev: 0.8517082 1.107373
##
## Variance function:
## Structure: fixed weights
## Formula: ~invwt
## Fixed effects: cbcl_scr_syn_internal_r ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl + prnt.empl.bl + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl
## Value Std.Error DF t-value
## (Intercept) 1.2627398 0.03311301 14511 38.13425
## interview_age.c9.y 0.0024228 0.00438766 14511 0.55219
## race_ethnicity.blHispanic -0.0327194 0.03110375 9309 -1.05194
## race_ethnicity.blBlack -0.3685892 0.03514211 9309 -10.48854
## race_ethnicity.blOther -0.0546427 0.03167776 9309 -1.72496
## high.educ.blBachelor 0.0205633 0.02651289 9309 0.77560
## high.educ.blSome College 0.0494810 0.03038591 9309 1.62842
## high.educ.blHS Diploma/GED -0.1309304 0.04314274 9309 -3.03482
## high.educ.bl< HS Diploma -0.1124291 0.05555319 9309 -2.02381
## prnt.empl.blStay at Home Parent 0.0311560 0.02695727 9309 1.15575
## prnt.empl.blUnemployed 0.1285039 0.04442650 9309 2.89251
## prnt.empl.blOther 0.1997699 0.03884139 9309 5.14322
## neighb_phenx_avg_p.bl.cm -0.1174866 0.01121878 9309 -10.47231
## overall.income.bl[>=50K & <100K] 0.1076249 0.02683549 9309 4.01054
## overall.income.bl[<50k] 0.1678532 0.03380331 9309 4.96558
## overall.income.bl[Don't Know or Refuse] 0.0690120 0.04256232 9309 1.62143
## sex.blFemale 0.0499342 0.01970427 9309 2.53418
## p-value
## (Intercept) 0.0000
## interview_age.c9.y 0.5808
## race_ethnicity.blHispanic 0.2929
## race_ethnicity.blBlack 0.0000
## race_ethnicity.blOther 0.0846
## high.educ.blBachelor 0.4380
## high.educ.blSome College 0.1035
## high.educ.blHS Diploma/GED 0.0024
## high.educ.bl< HS Diploma 0.0430
## prnt.empl.blStay at Home Parent 0.2478
## prnt.empl.blUnemployed 0.0038
## prnt.empl.blOther 0.0000
## neighb_phenx_avg_p.bl.cm 0.0000
## overall.income.bl[>=50K & <100K] 0.0001
## overall.income.bl[<50k] 0.0000
## overall.income.bl[Don't Know or Refuse] 0.1050
## sex.blFemale 0.0113
## Correlation:
## (Intr) in_.9. rc_t.H rc_t.B rc_t.O
## interview_age.c9.y -0.237
## race_ethnicity.blHispanic -0.156 0.007
## race_ethnicity.blBlack -0.136 0.011 0.344
## race_ethnicity.blOther -0.177 0.004 0.283 0.253
## high.educ.blBachelor -0.272 -0.003 -0.018 -0.012 -0.001
## high.educ.blSome College -0.186 0.003 -0.112 -0.084 -0.024
## high.educ.blHS Diploma/GED -0.116 0.009 -0.144 -0.143 -0.006
## high.educ.bl< HS Diploma -0.074 0.002 -0.171 -0.074 -0.010
## prnt.empl.blStay at Home Parent -0.137 0.010 0.043 0.092 0.017
## prnt.empl.blUnemployed -0.053 0.004 0.008 -0.041 0.009
## prnt.empl.blOther -0.060 0.004 0.040 0.009 -0.012
## neighb_phenx_avg_p.bl.cm -0.137 -0.005 0.044 0.153 0.047
## overall.income.bl[>=50K & <100K] -0.214 0.000 -0.094 -0.062 -0.015
## overall.income.bl[<50k] -0.134 0.000 -0.151 -0.186 -0.082
## overall.income.bl[Don't Know or Refuse] -0.118 -0.001 -0.101 -0.126 -0.062
## sex.blFemale -0.290 0.006 -0.008 -0.017 -0.018
## hgh..B hg..SC h..HSD h..<HD p..aHP
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College 0.455
## high.educ.blHS Diploma/GED 0.330 0.493
## high.educ.bl< HS Diploma 0.262 0.406 0.372
## prnt.empl.blStay at Home Parent -0.028 -0.014 -0.051 -0.094
## prnt.empl.blUnemployed -0.008 -0.009 -0.068 -0.097 0.147
## prnt.empl.blOther -0.012 -0.033 -0.010 -0.019 0.158
## neighb_phenx_avg_p.bl.cm -0.006 0.061 0.057 0.055 0.028
## overall.income.bl[>=50K & <100K] -0.174 -0.277 -0.171 -0.114 -0.032
## overall.income.bl[<50k] -0.159 -0.418 -0.365 -0.308 -0.054
## overall.income.bl[Don't Know or Refuse] -0.100 -0.250 -0.237 -0.219 -0.077
## sex.blFemale 0.015 0.023 0.015 -0.004 -0.006
## prn..U prn..O n___.. o..[&< o..[<5
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College
## high.educ.blHS Diploma/GED
## high.educ.bl< HS Diploma
## prnt.empl.blStay at Home Parent
## prnt.empl.blUnemployed
## prnt.empl.blOther 0.131
## neighb_phenx_avg_p.bl.cm 0.022 0.004
## overall.income.bl[>=50K & <100K] -0.015 -0.050 0.085
## overall.income.bl[<50k] -0.101 -0.140 0.156 0.504
## overall.income.bl[Don't Know or Refuse] -0.078 -0.098 0.087 0.358 0.481
## sex.blFemale 0.020 0.020 0.027 -0.006 -0.007
## o..KoR
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College
## high.educ.blHS Diploma/GED
## high.educ.bl< HS Diploma
## prnt.empl.blStay at Home Parent
## prnt.empl.blUnemployed
## prnt.empl.blOther
## neighb_phenx_avg_p.bl.cm
## overall.income.bl[>=50K & <100K]
## overall.income.bl[<50k]
## overall.income.bl[Don't Know or Refuse]
## sex.blFemale 0.008
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -3.0404923 -0.7894731 -0.1971850 0.4346512 4.2592012
##
## Number of Observations: 23857
## Number of Groups:
## abcd_site subjectid %in% abcd_site
## 21 9345
summary(internal_zinb_r$zi.fit)
## Linear mixed-effects model fit by maximum likelihood
## Data: data
## AIC BIC logLik
## NA NA NA
##
## Random effects:
## Formula: ~1 | abcd_site
## (Intercept) Residual
## StdDev: 0.3953535 0.6086933
##
## Variance function:
## Structure: fixed weights
## Formula: ~invwt
## Fixed effects: zp ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl + prnt.empl.bl + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl
## Value Std.Error DF t-value
## (Intercept) -4.095139 0.11408030 23820 -35.89698
## interview_age.c9.y 0.126396 0.02281688 23820 5.53956
## race_ethnicity.blHispanic 0.104012 0.07676474 23820 1.35494
## race_ethnicity.blBlack 0.907727 0.07370631 23820 12.31546
## race_ethnicity.blOther 0.395088 0.07552753 23820 5.23104
## high.educ.blBachelor 0.127581 0.06659833 23820 1.91568
## high.educ.blSome College 0.046868 0.07603602 23820 0.61639
## high.educ.blHS Diploma/GED 0.589169 0.09129453 23820 6.45350
## high.educ.bl< HS Diploma 0.812960 0.10625712 23820 7.65088
## prnt.empl.blStay at Home Parent -0.046392 0.06526792 23820 -0.71079
## prnt.empl.blUnemployed -0.114445 0.09341298 23820 -1.22515
## prnt.empl.blOther 0.027601 0.08717385 23820 0.31662
## neighb_phenx_avg_p.bl.cm 0.248386 0.02659340 23820 9.34015
## overall.income.bl[>=50K & <100K] -0.065052 0.06931711 23820 -0.93847
## overall.income.bl[<50k] 0.019233 0.08040320 23820 0.23921
## overall.income.bl[Don't Know or Refuse] 0.306092 0.08945439 23820 3.42177
## sex.blFemale -0.073846 0.04608496 23820 -1.60239
## p-value
## (Intercept) 0.0000
## interview_age.c9.y 0.0000
## race_ethnicity.blHispanic 0.1754
## race_ethnicity.blBlack 0.0000
## race_ethnicity.blOther 0.0000
## high.educ.blBachelor 0.0554
## high.educ.blSome College 0.5376
## high.educ.blHS Diploma/GED 0.0000
## high.educ.bl< HS Diploma 0.0000
## prnt.empl.blStay at Home Parent 0.4772
## prnt.empl.blUnemployed 0.2205
## prnt.empl.blOther 0.7515
## neighb_phenx_avg_p.bl.cm 0.0000
## overall.income.bl[>=50K & <100K] 0.3480
## overall.income.bl[<50k] 0.8109
## overall.income.bl[Don't Know or Refuse] 0.0006
## sex.blFemale 0.1091
## Correlation:
## (Intr) in_.9. rc_t.H rc_t.B rc_t.O
## interview_age.c9.y -0.392
## race_ethnicity.blHispanic -0.136 0.011
## race_ethnicity.blBlack -0.152 0.032 0.460
## race_ethnicity.blOther -0.169 0.004 0.352 0.340
## high.educ.blBachelor -0.215 -0.007 -0.012 -0.005 0.008
## high.educ.blSome College -0.147 0.010 -0.115 -0.103 -0.007
## high.educ.blHS Diploma/GED -0.116 0.027 -0.160 -0.162 0.002
## high.educ.bl< HS Diploma -0.081 0.011 -0.190 -0.106 -0.003
## prnt.empl.blStay at Home Parent -0.103 0.020 0.045 0.106 0.022
## prnt.empl.blUnemployed -0.041 0.010 0.017 -0.022 0.012
## prnt.empl.blOther -0.043 0.011 0.057 0.012 -0.009
## neighb_phenx_avg_p.bl.cm -0.120 -0.001 0.031 0.147 0.038
## overall.income.bl[>=50K & <100K] -0.150 -0.003 -0.110 -0.094 -0.027
## overall.income.bl[<50k] -0.104 -0.004 -0.157 -0.214 -0.082
## overall.income.bl[Don't Know or Refuse] -0.099 0.008 -0.121 -0.156 -0.070
## sex.blFemale -0.190 0.015 -0.002 -0.024 -0.017
## hgh..B hg..SC h..HSD h..<HD p..aHP
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College 0.479
## high.educ.blHS Diploma/GED 0.410 0.587
## high.educ.bl< HS Diploma 0.359 0.526 0.543
## prnt.empl.blStay at Home Parent -0.024 -0.013 -0.057 -0.125
## prnt.empl.blUnemployed -0.018 -0.005 -0.081 -0.120 0.177
## prnt.empl.blOther -0.017 -0.032 -0.021 -0.037 0.167
## neighb_phenx_avg_p.bl.cm 0.002 0.050 0.052 0.060 0.031
## overall.income.bl[>=50K & <100K] -0.165 -0.281 -0.205 -0.155 -0.023
## overall.income.bl[<50k] -0.159 -0.408 -0.412 -0.390 -0.051
## overall.income.bl[Don't Know or Refuse] -0.119 -0.296 -0.309 -0.288 -0.095
## sex.blFemale 0.006 0.009 0.010 -0.013 0.012
## prn..U prn..O n___.. o..[&< o..[<5
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College
## high.educ.blHS Diploma/GED
## high.educ.bl< HS Diploma
## prnt.empl.blStay at Home Parent
## prnt.empl.blUnemployed
## prnt.empl.blOther 0.149
## neighb_phenx_avg_p.bl.cm 0.024 -0.008
## overall.income.bl[>=50K & <100K] -0.014 -0.049 0.065
## overall.income.bl[<50k] -0.081 -0.126 0.129 0.535
## overall.income.bl[Don't Know or Refuse] -0.082 -0.110 0.074 0.437 0.604
## sex.blFemale 0.035 0.015 0.020 -0.007 -0.003
## o..KoR
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College
## high.educ.blHS Diploma/GED
## high.educ.bl< HS Diploma
## prnt.empl.blStay at Home Parent
## prnt.empl.blUnemployed
## prnt.empl.blOther
## neighb_phenx_avg_p.bl.cm
## overall.income.bl[>=50K & <100K]
## overall.income.bl[<50k]
## overall.income.bl[Don't Know or Refuse]
## sex.blFemale -0.004
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -0.8137244 -0.3078959 -0.2384644 -0.1774828 19.5714128
##
## Number of Observations: 23857
## Number of Groups: 21
anova(internal_zinb_r)
## numDF denDF F-value p-value
## (Intercept) 1 14511 2950.3403 <.0001
## interview_age.c9.y 1 14511 0.2593 0.6106
## race_ethnicity.bl 3 9309 19.5365 <.0001
## high.educ.bl 4 9309 12.1290 <.0001
## prnt.empl.bl 3 9309 15.4056 <.0001
## neighb_phenx_avg_p.bl.cm 1 9309 131.4105 <.0001
## overall.income.bl 3 9309 9.7692 <.0001
## sex.bl 1 9309 6.4221 0.0113
VarCorr(internal_zinb_r)
## Variance StdDev
## abcd_site = pdLogChol(1)
## (Intercept) 0.01042867 0.1021209
## subjectid = pdLogChol(1)
## (Intercept) 0.72540687 0.8517082
## Residual 1.22627555 1.1073733
Zero inflated negative binomial (zinb) regression already has overdispersion and excess zeros and this is accounted for in the zinb modeling chosen, “The data distribution combines the negative binomial distribution and the logit distribution”
Details on zinb can be found here: link
For Model Checking we will follow the following pdf: link This info is further detailed/published in books by Cameron and Trivedi (2013) and Hilbe (2014) and in Garay, Hashimoto, Ortega, and Lachos (2011).
They suggest using Pearson residuals.
#Check outlier/residuals with this df
internal_res <- df_cc
internal_res$level1_resid.raw <- residuals(internal_zinb_r)
internal_res$level1_resid.pearson <- residuals(internal_zinb_r, type="pearson")
#Add predicted values (Yhat)
internal_res$cbcl_scr_syn_internal_r_predicted <- predict(internal_zinb_r,internal_res,type="response")
#Incidence
internal_res$incidence <- estimate.probability(internal_res$cbcl_scr_syn_internal_r, method="empirical")
#Plotting histogram of residuals, but may be skewed since using ZINB, so make sure to check below plots
hist(internal_res$level1_resid.pearson)
“These plots show each of the independent variables plotted against the incidence as measured by Y (CBCL Outcome). They should be scanned for outliers and curvilinear patterns.”
#age
ggplot(internal_res,aes(incidence,interview_age)) + geom_point(color = "black") + geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'
“This plot shows the residuals versus the dependent variable. It can be used to spot outliers.”
plot(internal_res$level1_resid.pearson, internal_res$cbcl_scr_syn_internal_r)
“This plot shows the residuals versus the predicted value (Yhat) of the dependent variable. It can show outliers.”
plot(internal_res$level1_resid.pearson, internal_res$cbcl_scr_syn_internal_r_predicted)
“This plot shows the residuals versus the row numbers. It is used to quickly spot rows that have large residuals.”
plot(as.numeric(rownames(internal_res)),internal_res$level1_resid.pearson)
“These plots show the residuals plotted against the independent variables. They are used to spot outliers. They are also used to find curvilinear patterns that are not represented in the regression model.”
#age
ggplot(internal_res,aes(level1_resid.pearson,interview_age)) + geom_point(color = "black") + geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'
For below models, view Internalizing above for notes.
external_zinb_r <- glmm.zinb(cbcl_scr_syn_external_r ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl+ prnt.empl.bl + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl, random = ~1|abcd_site/subjectid,
zi_fixed = ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl+ prnt.empl.bl + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl, zi_random = ~1|abcd_site, data = df_cc)
## Computational iterations: 9
## Computational time: 1.169 minutes
summary(external_zinb_r)
## Linear mixed-effects model fit by maximum likelihood
## Data: df_cc
## AIC BIC logLik
## NA NA NA
##
## Random effects:
## Formula: ~1 | abcd_site
## (Intercept)
## StdDev: 0.1254936
##
## Formula: ~1 | subjectid %in% abcd_site
## (Intercept) Residual
## StdDev: 1.099871 1.058043
##
## Variance function:
## Structure: fixed weights
## Formula: ~invwt
## Fixed effects: cbcl_scr_syn_external_r ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl + prnt.empl.bl + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl
## Value Std.Error DF t-value
## (Intercept) 0.9429491 0.04128323 14511 22.840969
## interview_age.c9.y -0.0388390 0.00472466 14511 -8.220496
## race_ethnicity.blHispanic -0.0658765 0.03973144 9309 -1.658045
## race_ethnicity.blBlack -0.1128041 0.04416806 9309 -2.553975
## race_ethnicity.blOther -0.0418034 0.04070132 9309 -1.027076
## high.educ.blBachelor 0.1087898 0.03415121 9309 3.185532
## high.educ.blSome College 0.1948234 0.03890717 9309 5.007390
## high.educ.blHS Diploma/GED 0.0686531 0.05457601 9309 1.257936
## high.educ.bl< HS Diploma 0.0979019 0.07026626 9309 1.393299
## prnt.empl.blStay at Home Parent 0.0116881 0.03457543 9309 0.338048
## prnt.empl.blUnemployed 0.1983548 0.05599885 9309 3.542122
## prnt.empl.blOther 0.1971107 0.04941824 9309 3.988622
## neighb_phenx_avg_p.bl.cm -0.1247092 0.01428901 9309 -8.727629
## overall.income.bl[>=50K & <100K] 0.1264413 0.03450719 9309 3.664202
## overall.income.bl[<50k] 0.2620838 0.04312837 9309 6.076832
## overall.income.bl[Don't Know or Refuse] 0.1639139 0.05410510 9309 3.029547
## sex.blFemale -0.2978009 0.02525140 9309 -11.793441
## p-value
## (Intercept) 0.0000
## interview_age.c9.y 0.0000
## race_ethnicity.blHispanic 0.0973
## race_ethnicity.blBlack 0.0107
## race_ethnicity.blOther 0.3044
## high.educ.blBachelor 0.0014
## high.educ.blSome College 0.0000
## high.educ.blHS Diploma/GED 0.2084
## high.educ.bl< HS Diploma 0.1636
## prnt.empl.blStay at Home Parent 0.7353
## prnt.empl.blUnemployed 0.0004
## prnt.empl.blOther 0.0001
## neighb_phenx_avg_p.bl.cm 0.0000
## overall.income.bl[>=50K & <100K] 0.0002
## overall.income.bl[<50k] 0.0000
## overall.income.bl[Don't Know or Refuse] 0.0025
## sex.blFemale 0.0000
## Correlation:
## (Intr) in_.9. rc_t.H rc_t.B rc_t.O
## interview_age.c9.y -0.202
## race_ethnicity.blHispanic -0.161 0.006
## race_ethnicity.blBlack -0.141 0.011 0.351
## race_ethnicity.blOther -0.184 0.004 0.285 0.260
## high.educ.blBachelor -0.282 -0.002 -0.021 -0.014 -0.002
## high.educ.blSome College -0.195 0.003 -0.112 -0.085 -0.025
## high.educ.blHS Diploma/GED -0.122 0.007 -0.145 -0.148 -0.009
## high.educ.bl< HS Diploma -0.078 0.001 -0.173 -0.080 -0.013
## prnt.empl.blStay at Home Parent -0.140 0.009 0.042 0.092 0.017
## prnt.empl.blUnemployed -0.053 0.003 0.008 -0.042 0.010
## prnt.empl.blOther -0.061 0.003 0.041 0.011 -0.011
## neighb_phenx_avg_p.bl.cm -0.141 -0.004 0.046 0.152 0.049
## overall.income.bl[>=50K & <100K] -0.223 -0.001 -0.091 -0.063 -0.012
## overall.income.bl[<50k] -0.141 0.000 -0.148 -0.186 -0.081
## overall.income.bl[Don't Know or Refuse] -0.125 0.000 -0.101 -0.127 -0.057
## sex.blFemale -0.288 0.005 -0.009 -0.019 -0.017
## hgh..B hg..SC h..HSD h..<HD p..aHP
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College 0.461
## high.educ.blHS Diploma/GED 0.339 0.503
## high.educ.bl< HS Diploma 0.269 0.414 0.381
## prnt.empl.blStay at Home Parent -0.030 -0.015 -0.050 -0.096
## prnt.empl.blUnemployed -0.009 -0.010 -0.069 -0.099 0.149
## prnt.empl.blOther -0.013 -0.033 -0.014 -0.020 0.159
## neighb_phenx_avg_p.bl.cm -0.006 0.060 0.056 0.053 0.029
## overall.income.bl[>=50K & <100K] -0.175 -0.276 -0.174 -0.116 -0.029
## overall.income.bl[<50k] -0.160 -0.418 -0.367 -0.311 -0.053
## overall.income.bl[Don't Know or Refuse] -0.100 -0.254 -0.241 -0.221 -0.076
## sex.blFemale 0.013 0.022 0.014 -0.005 -0.006
## prn..U prn..O n___.. o..[&< o..[<5
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College
## high.educ.blHS Diploma/GED
## high.educ.bl< HS Diploma
## prnt.empl.blStay at Home Parent
## prnt.empl.blUnemployed
## prnt.empl.blOther 0.134
## neighb_phenx_avg_p.bl.cm 0.024 0.004
## overall.income.bl[>=50K & <100K] -0.014 -0.049 0.084
## overall.income.bl[<50k] -0.101 -0.139 0.158 0.508
## overall.income.bl[Don't Know or Refuse] -0.079 -0.099 0.087 0.364 0.489
## sex.blFemale 0.018 0.017 0.028 -0.006 -0.007
## o..KoR
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College
## high.educ.blHS Diploma/GED
## high.educ.bl< HS Diploma
## prnt.empl.blStay at Home Parent
## prnt.empl.blUnemployed
## prnt.empl.blOther
## neighb_phenx_avg_p.bl.cm
## overall.income.bl[>=50K & <100K]
## overall.income.bl[<50k]
## overall.income.bl[Don't Know or Refuse]
## sex.blFemale 0.007
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -3.1969701 -0.7289727 -0.2621957 0.3860818 4.4722136
##
## Number of Observations: 23857
## Number of Groups:
## abcd_site subjectid %in% abcd_site
## 21 9345
summary(external_zinb_r$zi.fit)
## Linear mixed-effects model fit by maximum likelihood
## Data: data
## AIC BIC logLik
## NA NA NA
##
## Random effects:
## Formula: ~1 | abcd_site
## (Intercept) Residual
## StdDev: 0.2626422 0.5426526
##
## Variance function:
## Structure: fixed weights
## Formula: ~invwt
## Fixed effects: zp ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl + prnt.empl.bl + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl
## Value Std.Error DF t-value
## (Intercept) -4.099752 0.08505026 23820 -48.20388
## interview_age.c9.y 0.214792 0.01927783 23820 11.14192
## race_ethnicity.blHispanic 0.219816 0.06303372 23820 3.48728
## race_ethnicity.blBlack 0.607992 0.06540608 23820 9.29565
## race_ethnicity.blOther 0.376809 0.06093286 23820 6.18400
## high.educ.blBachelor 0.107057 0.05363891 23820 1.99588
## high.educ.blSome College 0.148441 0.06230576 23820 2.38247
## high.educ.blHS Diploma/GED 0.415366 0.08145102 23820 5.09959
## high.educ.bl< HS Diploma 0.546205 0.09850925 23820 5.54471
## prnt.empl.blStay at Home Parent 0.025767 0.05387006 23820 0.47832
## prnt.empl.blUnemployed 0.020936 0.08281601 23820 0.25280
## prnt.empl.blOther -0.298640 0.08717990 23820 -3.42556
## neighb_phenx_avg_p.bl.cm 0.200864 0.02304520 23820 8.71610
## overall.income.bl[>=50K & <100K] -0.202154 0.05599558 23820 -3.61018
## overall.income.bl[<50k] -0.158909 0.06768490 23820 -2.34777
## overall.income.bl[Don't Know or Refuse] 0.175260 0.07675483 23820 2.28338
## sex.blFemale 0.226257 0.03896374 23820 5.80687
## p-value
## (Intercept) 0.0000
## interview_age.c9.y 0.0000
## race_ethnicity.blHispanic 0.0005
## race_ethnicity.blBlack 0.0000
## race_ethnicity.blOther 0.0000
## high.educ.blBachelor 0.0460
## high.educ.blSome College 0.0172
## high.educ.blHS Diploma/GED 0.0000
## high.educ.bl< HS Diploma 0.0000
## prnt.empl.blStay at Home Parent 0.6324
## prnt.empl.blUnemployed 0.8004
## prnt.empl.blOther 0.0006
## neighb_phenx_avg_p.bl.cm 0.0000
## overall.income.bl[>=50K & <100K] 0.0003
## overall.income.bl[<50k] 0.0189
## overall.income.bl[Don't Know or Refuse] 0.0224
## sex.blFemale 0.0000
## Correlation:
## (Intr) in_.9. rc_t.H rc_t.B rc_t.O
## interview_age.c9.y -0.464
## race_ethnicity.blHispanic -0.153 0.014
## race_ethnicity.blBlack -0.154 0.033 0.403
## race_ethnicity.blOther -0.180 0.006 0.333 0.304
## high.educ.blBachelor -0.231 -0.007 -0.007 -0.004 0.013
## high.educ.blSome College -0.160 0.007 -0.117 -0.092 -0.008
## high.educ.blHS Diploma/GED -0.115 0.023 -0.156 -0.152 0.002
## high.educ.bl< HS Diploma -0.080 0.010 -0.176 -0.091 0.000
## prnt.empl.blStay at Home Parent -0.117 0.022 0.047 0.105 0.021
## prnt.empl.blUnemployed -0.048 0.009 0.013 -0.033 0.012
## prnt.empl.blOther -0.040 0.008 0.042 0.002 -0.013
## neighb_phenx_avg_p.bl.cm -0.137 -0.001 0.037 0.146 0.038
## overall.income.bl[>=50K & <100K] -0.148 -0.001 -0.099 -0.082 -0.022
## overall.income.bl[<50k] -0.096 -0.001 -0.146 -0.204 -0.082
## overall.income.bl[Don't Know or Refuse] -0.094 0.008 -0.110 -0.142 -0.066
## sex.blFemale -0.252 0.015 -0.003 -0.017 -0.011
## hgh..B hg..SC h..HSD h..<HD p..aHP
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College 0.460
## high.educ.blHS Diploma/GED 0.363 0.539
## high.educ.bl< HS Diploma 0.306 0.470 0.465
## prnt.empl.blStay at Home Parent -0.031 -0.020 -0.058 -0.116
## prnt.empl.blUnemployed -0.016 -0.009 -0.078 -0.116 0.163
## prnt.empl.blOther -0.018 -0.027 -0.010 -0.028 0.140
## neighb_phenx_avg_p.bl.cm -0.001 0.053 0.052 0.060 0.030
## overall.income.bl[>=50K & <100K] -0.159 -0.284 -0.189 -0.136 -0.022
## overall.income.bl[<50k] -0.150 -0.413 -0.397 -0.357 -0.048
## overall.income.bl[Don't Know or Refuse] -0.108 -0.285 -0.289 -0.271 -0.092
## sex.blFemale 0.013 0.017 0.012 -0.004 0.004
## prn..U prn..O n___.. o..[&< o..[<5
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College
## high.educ.blHS Diploma/GED
## high.educ.bl< HS Diploma
## prnt.empl.blStay at Home Parent
## prnt.empl.blUnemployed
## prnt.empl.blOther 0.124
## neighb_phenx_avg_p.bl.cm 0.030 -0.004
## overall.income.bl[>=50K & <100K] -0.012 -0.041 0.076
## overall.income.bl[<50k] -0.088 -0.117 0.139 0.490
## overall.income.bl[Don't Know or Refuse] -0.083 -0.099 0.089 0.388 0.546
## sex.blFemale 0.030 0.012 0.025 -0.006 -0.004
## o..KoR
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College
## high.educ.blHS Diploma/GED
## high.educ.bl< HS Diploma
## prnt.empl.blStay at Home Parent
## prnt.empl.blUnemployed
## prnt.empl.blOther
## neighb_phenx_avg_p.bl.cm
## overall.income.bl[>=50K & <100K]
## overall.income.bl[<50k]
## overall.income.bl[Don't Know or Refuse]
## sex.blFemale 0.000
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -0.7850906 -0.3556462 -0.2854172 0.2325046 17.3825587
##
## Number of Observations: 23857
## Number of Groups: 21
anova(external_zinb_r)
## numDF denDF F-value p-value
## (Intercept) 1 14511 966.2379 <.0001
## interview_age.c9.y 1 14511 69.8714 <.0001
## race_ethnicity.bl 3 9309 6.2238 3e-04
## high.educ.bl 4 9309 31.8134 <.0001
## prnt.empl.bl 3 9309 15.2470 <.0001
## neighb_phenx_avg_p.bl.cm 1 9309 89.7652 <.0001
## overall.income.bl 3 9309 12.1165 <.0001
## sex.bl 1 9309 139.0852 <.0001
VarCorr(external_zinb_r)
## Variance StdDev
## abcd_site = pdLogChol(1)
## (Intercept) 0.01574865 0.1254936
## subjectid = pdLogChol(1)
## (Intercept) 1.20971568 1.0998708
## Residual 1.11945536 1.0580432
#Check outlier/residuals with this df
external_res <- df_cc
external_res$level1_resid.raw <- residuals(external_zinb_r)
external_res$level1_resid.pearson <- residuals(external_zinb_r, type="pearson")
#Add predicted values (Yhat)
external_res$cbcl_scr_syn_external_r_predicted <- predict(external_zinb_r,external_res,type="response")
#Incidence
external_res$incidence <- estimate.probability(external_res$cbcl_scr_syn_external_r, method="empirical")
#Plotting histogram of residuals, but may be skewed since using ZINB, so make sure to check below plots
hist(external_res$level1_resid.pearson)
### Incidence vs. X’s Plots
#age
ggplot(external_res,aes(incidence,interview_age)) + geom_point(color = "black") + geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'
### Residuals vs Y (CBCL Outcome) Plot
plot(external_res$level1_resid.pearson, external_res$cbcl_scr_syn_external_r)
### Residuals vs Yhat Plot
plot(external_res$level1_resid.pearson, external_res$cbcl_scr_syn_external_r_predicted)
### Residuals vs Row Plot
plot(as.numeric(rownames(external_res)),external_res$level1_resid.pearson)
### Residuals vs X’s Plots
#age
ggplot(external_res,aes(level1_resid.pearson,interview_age)) + geom_point(color = "black") + geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'
anxdep_zinb_r <- glmm.zinb(cbcl_scr_syn_anxdep_r ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl+ prnt.empl.bl + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl, random = ~1|abcd_site/subjectid,
zi_fixed = ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl+ prnt.empl.bl + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl, zi_random = ~1|abcd_site, data = df_cc)
## Computational iterations: 11
## Computational time: 1.398 minutes
summary(anxdep_zinb_r)
## Linear mixed-effects model fit by maximum likelihood
## Data: df_cc
## AIC BIC logLik
## NA NA NA
##
## Random effects:
## Formula: ~1 | abcd_site
## (Intercept)
## StdDev: 0.1121779
##
## Formula: ~1 | subjectid %in% abcd_site
## (Intercept) Residual
## StdDev: 0.9726568 0.9392575
##
## Variance function:
## Structure: fixed weights
## Formula: ~invwt
## Fixed effects: cbcl_scr_syn_anxdep_r ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl + prnt.empl.bl + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl
## Value Std.Error DF t-value
## (Intercept) 0.5937127 0.03739623 14511 15.876270
## interview_age.c9.y -0.0243781 0.00506976 14511 -4.808528
## race_ethnicity.blHispanic -0.0249894 0.03595399 9309 -0.695038
## race_ethnicity.blBlack -0.4486266 0.04109866 9309 -10.915845
## race_ethnicity.blOther -0.1019393 0.03675753 9309 -2.773289
## high.educ.blBachelor -0.0277787 0.03066520 9309 -0.905872
## high.educ.blSome College -0.0537389 0.03524909 9309 -1.524548
## high.educ.blHS Diploma/GED -0.2846188 0.05037691 9309 -5.649788
## high.educ.bl< HS Diploma -0.2767381 0.06496771 9309 -4.259625
## prnt.empl.blStay at Home Parent 0.0402005 0.03126878 9309 1.285645
## prnt.empl.blUnemployed 0.1838573 0.05155648 9309 3.566134
## prnt.empl.blOther 0.1508819 0.04528546 9309 3.331796
## neighb_phenx_avg_p.bl.cm -0.1080045 0.01304065 9309 -8.282143
## overall.income.bl[>=50K & <100K] 0.1111251 0.03105252 9309 3.578617
## overall.income.bl[<50k] 0.1582101 0.03925333 9309 4.030488
## overall.income.bl[Don't Know or Refuse] 0.0311890 0.04956838 9309 0.629212
## sex.blFemale 0.0552329 0.02286829 9309 2.415260
## p-value
## (Intercept) 0.0000
## interview_age.c9.y 0.0000
## race_ethnicity.blHispanic 0.4870
## race_ethnicity.blBlack 0.0000
## race_ethnicity.blOther 0.0056
## high.educ.blBachelor 0.3650
## high.educ.blSome College 0.1274
## high.educ.blHS Diploma/GED 0.0000
## high.educ.bl< HS Diploma 0.0000
## prnt.empl.blStay at Home Parent 0.1986
## prnt.empl.blUnemployed 0.0004
## prnt.empl.blOther 0.0009
## neighb_phenx_avg_p.bl.cm 0.0000
## overall.income.bl[>=50K & <100K] 0.0003
## overall.income.bl[<50k] 0.0001
## overall.income.bl[Don't Know or Refuse] 0.5292
## sex.blFemale 0.0157
## Correlation:
## (Intr) in_.9. rc_t.H rc_t.B rc_t.O
## interview_age.c9.y -0.240
## race_ethnicity.blHispanic -0.159 0.008
## race_ethnicity.blBlack -0.139 0.012 0.340
## race_ethnicity.blOther -0.181 0.004 0.282 0.250
## high.educ.blBachelor -0.276 -0.002 -0.018 -0.011 0.000
## high.educ.blSome College -0.188 0.003 -0.110 -0.082 -0.024
## high.educ.blHS Diploma/GED -0.116 0.008 -0.142 -0.142 -0.006
## high.educ.bl< HS Diploma -0.074 0.002 -0.171 -0.071 -0.011
## prnt.empl.blStay at Home Parent -0.140 0.011 0.042 0.092 0.019
## prnt.empl.blUnemployed -0.055 0.004 0.008 -0.041 0.009
## prnt.empl.blOther -0.062 0.004 0.040 0.009 -0.011
## neighb_phenx_avg_p.bl.cm -0.142 -0.004 0.045 0.155 0.046
## overall.income.bl[>=50K & <100K] -0.219 0.000 -0.094 -0.061 -0.015
## overall.income.bl[<50k] -0.137 0.000 -0.152 -0.184 -0.083
## overall.income.bl[Don't Know or Refuse] -0.120 -0.001 -0.100 -0.123 -0.063
## sex.blFemale -0.298 0.005 -0.008 -0.018 -0.018
## hgh..B hg..SC h..HSD h..<HD p..aHP
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College 0.453
## high.educ.blHS Diploma/GED 0.327 0.488
## high.educ.bl< HS Diploma 0.259 0.402 0.367
## prnt.empl.blStay at Home Parent -0.031 -0.015 -0.053 -0.095
## prnt.empl.blUnemployed -0.008 -0.009 -0.069 -0.098 0.147
## prnt.empl.blOther -0.013 -0.033 -0.011 -0.020 0.157
## neighb_phenx_avg_p.bl.cm -0.007 0.062 0.058 0.055 0.029
## overall.income.bl[>=50K & <100K] -0.175 -0.277 -0.169 -0.112 -0.031
## overall.income.bl[<50k] -0.160 -0.419 -0.363 -0.307 -0.052
## overall.income.bl[Don't Know or Refuse] -0.100 -0.249 -0.234 -0.216 -0.076
## sex.blFemale 0.015 0.023 0.015 -0.003 -0.006
## prn..U prn..O n___.. o..[&< o..[<5
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College
## high.educ.blHS Diploma/GED
## high.educ.bl< HS Diploma
## prnt.empl.blStay at Home Parent
## prnt.empl.blUnemployed
## prnt.empl.blOther 0.131
## neighb_phenx_avg_p.bl.cm 0.024 0.005
## overall.income.bl[>=50K & <100K] -0.015 -0.049 0.085
## overall.income.bl[<50k] -0.102 -0.140 0.156 0.503
## overall.income.bl[Don't Know or Refuse] -0.078 -0.097 0.087 0.355 0.476
## sex.blFemale 0.020 0.020 0.027 -0.006 -0.007
## o..KoR
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College
## high.educ.blHS Diploma/GED
## high.educ.bl< HS Diploma
## prnt.empl.blStay at Home Parent
## prnt.empl.blUnemployed
## prnt.empl.blOther
## neighb_phenx_avg_p.bl.cm
## overall.income.bl[>=50K & <100K]
## overall.income.bl[<50k]
## overall.income.bl[Don't Know or Refuse]
## sex.blFemale 0.007
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -2.8041137 -0.7602766 -0.2203188 0.4366498 4.4144158
##
## Number of Observations: 23857
## Number of Groups:
## abcd_site subjectid %in% abcd_site
## 21 9345
summary(anxdep_zinb_r$zi.fit)
## Linear mixed-effects model fit by maximum likelihood
## Data: data
## AIC BIC logLik
## NA NA NA
##
## Random effects:
## Formula: ~1 | abcd_site
## (Intercept) Residual
## StdDev: 0.3811096 0.4309838
##
## Variance function:
## Structure: fixed weights
## Formula: ~invwt
## Fixed effects: zp ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl + prnt.empl.bl + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl
## Value Std.Error DF t-value
## (Intercept) -5.007919 0.10640506 23820 -47.06467
## interview_age.c9.y 0.352666 0.01871702 23820 18.84198
## race_ethnicity.blHispanic 0.069959 0.06425351 23820 1.08879
## race_ethnicity.blBlack 1.089479 0.05885231 23820 18.51208
## race_ethnicity.blOther 0.342804 0.06465785 23820 5.30182
## high.educ.blBachelor 0.347615 0.05660319 23820 6.14127
## high.educ.blSome College 0.301698 0.06302923 23820 4.78663
## high.educ.blHS Diploma/GED 0.515374 0.07714294 23820 6.68077
## high.educ.bl< HS Diploma 0.927745 0.08551122 23820 10.84940
## prnt.empl.blStay at Home Parent 0.016360 0.05344281 23820 0.30612
## prnt.empl.blUnemployed -0.064903 0.07214538 23820 -0.89961
## prnt.empl.blOther 0.079382 0.06762996 23820 1.17376
## neighb_phenx_avg_p.bl.cm 0.300654 0.02156670 23820 13.94068
## overall.income.bl[>=50K & <100K] -0.174903 0.06026414 23820 -2.90228
## overall.income.bl[<50k] 0.309110 0.06461715 23820 4.78371
## overall.income.bl[Don't Know or Refuse] 0.338358 0.07467982 23820 4.53079
## sex.blFemale -0.321314 0.03823591 23820 -8.40346
## p-value
## (Intercept) 0.0000
## interview_age.c9.y 0.0000
## race_ethnicity.blHispanic 0.2763
## race_ethnicity.blBlack 0.0000
## race_ethnicity.blOther 0.0000
## high.educ.blBachelor 0.0000
## high.educ.blSome College 0.0000
## high.educ.blHS Diploma/GED 0.0000
## high.educ.bl< HS Diploma 0.0000
## prnt.empl.blStay at Home Parent 0.7595
## prnt.empl.blUnemployed 0.3683
## prnt.empl.blOther 0.2405
## neighb_phenx_avg_p.bl.cm 0.0000
## overall.income.bl[>=50K & <100K] 0.0037
## overall.income.bl[<50k] 0.0000
## overall.income.bl[Don't Know or Refuse] 0.0000
## sex.blFemale 0.0000
## Correlation:
## (Intr) in_.9. rc_t.H rc_t.B rc_t.O
## interview_age.c9.y -0.387
## race_ethnicity.blHispanic -0.126 0.012
## race_ethnicity.blBlack -0.151 0.043 0.490
## race_ethnicity.blOther -0.153 0.005 0.357 0.365
## high.educ.blBachelor -0.228 -0.003 -0.015 0.000 0.009
## high.educ.blSome College -0.167 0.015 -0.106 -0.093 -0.006
## high.educ.blHS Diploma/GED -0.131 0.028 -0.142 -0.146 0.003
## high.educ.bl< HS Diploma -0.103 0.014 -0.177 -0.104 -0.008
## prnt.empl.blStay at Home Parent -0.100 0.026 0.047 0.105 0.026
## prnt.empl.blUnemployed -0.038 0.011 0.014 -0.014 0.016
## prnt.empl.blOther -0.040 0.010 0.059 0.012 -0.003
## neighb_phenx_avg_p.bl.cm -0.114 0.006 0.027 0.157 0.039
## overall.income.bl[>=50K & <100K] -0.137 -0.005 -0.107 -0.101 -0.028
## overall.income.bl[<50k] -0.104 0.001 -0.168 -0.223 -0.087
## overall.income.bl[Don't Know or Refuse] -0.099 0.013 -0.121 -0.155 -0.069
## sex.blFemale -0.149 0.013 -0.003 -0.032 -0.020
## hgh..B hg..SC h..HSD h..<HD p..aHP
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College 0.543
## high.educ.blHS Diploma/GED 0.454 0.623
## high.educ.bl< HS Diploma 0.414 0.577 0.558
## prnt.empl.blStay at Home Parent -0.019 -0.009 -0.050 -0.120
## prnt.empl.blUnemployed -0.022 -0.003 -0.082 -0.118 0.185
## prnt.empl.blOther -0.014 -0.029 -0.023 -0.028 0.175
## neighb_phenx_avg_p.bl.cm 0.005 0.051 0.046 0.058 0.036
## overall.income.bl[>=50K & <100K] -0.158 -0.272 -0.194 -0.157 -0.019
## overall.income.bl[<50k] -0.167 -0.419 -0.392 -0.379 -0.050
## overall.income.bl[Don't Know or Refuse] -0.121 -0.293 -0.283 -0.272 -0.087
## sex.blFemale 0.009 0.009 0.013 -0.012 0.015
## prn..U prn..O n___.. o..[&< o..[<5
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College
## high.educ.blHS Diploma/GED
## high.educ.bl< HS Diploma
## prnt.empl.blStay at Home Parent
## prnt.empl.blUnemployed
## prnt.empl.blOther 0.162
## neighb_phenx_avg_p.bl.cm 0.033 -0.013
## overall.income.bl[>=50K & <100K] -0.010 -0.040 0.057
## overall.income.bl[<50k] -0.082 -0.129 0.128 0.550
## overall.income.bl[Don't Know or Refuse] -0.081 -0.114 0.071 0.435 0.615
## sex.blFemale 0.037 0.017 0.016 0.001 -0.002
## o..KoR
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College
## high.educ.blHS Diploma/GED
## high.educ.bl< HS Diploma
## prnt.empl.blStay at Home Parent
## prnt.empl.blUnemployed
## prnt.empl.blOther
## neighb_phenx_avg_p.bl.cm
## overall.income.bl[>=50K & <100K]
## overall.income.bl[<50k]
## overall.income.bl[Don't Know or Refuse]
## sex.blFemale -0.003
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -1.4245472 -0.3355471 -0.2311088 0.2379168 38.4888000
##
## Number of Observations: 23857
## Number of Groups: 21
anova(anxdep_zinb_r)
## numDF denDF F-value p-value
## (Intercept) 1 14511 394.8857 <.0001
## interview_age.c9.y 1 14511 22.2878 <.0001
## race_ethnicity.bl 3 9309 33.3004 <.0001
## high.educ.bl 4 9309 5.8283 0.0001
## prnt.empl.bl 3 9309 10.2481 <.0001
## neighb_phenx_avg_p.bl.cm 1 9309 82.5352 <.0001
## overall.income.bl 3 9309 7.6171 <.0001
## sex.bl 1 9309 5.8335 0.0157
#Check outlier/residuals with this df
anxdep_res <- df_cc
anxdep_res$level1_resid.raw <- residuals(anxdep_zinb_r)
anxdep_res$level1_resid.pearson <- residuals(anxdep_zinb_r, type="pearson")
#Add predicted values (Yhat)
anxdep_res$cbcl_scr_syn_anxdep_r_predicted <- predict(anxdep_zinb_r,anxdep_res,type="response")
#Incidence
anxdep_res$incidence <- estimate.probability(anxdep_res$cbcl_scr_syn_anxdep_r, method="empirical")
#Plotting histogram of residuals, but may be skewed since using ZINB, so make sure to check below plots
hist(anxdep_res$level1_resid.pearson)
### Incidence vs. X’s Plots
#age
ggplot(anxdep_res,aes(incidence,interview_age)) + geom_point(color = "black") + geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'
### Residuals vs Y (CBCL Outcome) Plot
plot(anxdep_res$level1_resid.pearson, anxdep_res$cbcl_scr_syn_anxdep_r)
### Residuals vs Yhat Plot
plot(anxdep_res$level1_resid.pearson, anxdep_res$cbcl_scr_syn_anxdep_r_predicted)
### Residuals vs Row Plot
plot(as.numeric(rownames(anxdep_res)),anxdep_res$level1_resid.pearson)
### Residuals vs X’s Plots
#age
ggplot(anxdep_res,aes(level1_resid.pearson,interview_age)) + geom_point(color = "black") + geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'
withdep_zinb_r <- glmm.zinb(cbcl_scr_syn_withdep_r ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl+ prnt.empl.bl + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl, random = ~1|abcd_site/subjectid,
zi_fixed = ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl+ prnt.empl.bl + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl, zi_random = ~1|abcd_site, data = df_cc)
## Computational iterations: 15
## Computational time: 1.983 minutes
summary(withdep_zinb_r)
## Linear mixed-effects model fit by maximum likelihood
## Data: df_cc
## AIC BIC logLik
## NA NA NA
##
## Random effects:
## Formula: ~1 | abcd_site
## (Intercept)
## StdDev: 0.1118689
##
## Formula: ~1 | subjectid %in% abcd_site
## (Intercept) Residual
## StdDev: 1.206147 0.8065221
##
## Variance function:
## Structure: fixed weights
## Formula: ~invwt
## Fixed effects: cbcl_scr_syn_withdep_r ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl + prnt.empl.bl + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl
## Value Std.Error DF t-value
## (Intercept) -0.8180659 0.04435421 14511 -18.443932
## interview_age.c9.y 0.0936966 0.00648940 14511 14.438407
## race_ethnicity.blHispanic -0.0168186 0.04588625 9309 -0.366528
## race_ethnicity.blBlack -0.3142541 0.05202813 9309 -6.040081
## race_ethnicity.blOther 0.0169591 0.04710236 9309 0.360047
## high.educ.blBachelor 0.0774744 0.04008315 9309 1.932843
## high.educ.blSome College 0.1951026 0.04545551 9309 4.292166
## high.educ.blHS Diploma/GED 0.0918854 0.06365579 9309 1.443472
## high.educ.bl< HS Diploma 0.1290412 0.08120372 9309 1.589104
## prnt.empl.blStay at Home Parent 0.0784586 0.04011413 9309 1.955884
## prnt.empl.blUnemployed 0.2379993 0.06480057 9309 3.672796
## prnt.empl.blOther 0.2794945 0.05706447 9309 4.897872
## neighb_phenx_avg_p.bl.cm -0.1416896 0.01656597 9309 -8.553053
## overall.income.bl[>=50K & <100K] 0.1598888 0.04039175 9309 3.958453
## overall.income.bl[<50k] 0.2802247 0.05024633 9309 5.577019
## overall.income.bl[Don't Know or Refuse] 0.2424366 0.06309744 9309 3.842257
## sex.blFemale -0.0720659 0.02947982 9309 -2.444584
## p-value
## (Intercept) 0.0000
## interview_age.c9.y 0.0000
## race_ethnicity.blHispanic 0.7140
## race_ethnicity.blBlack 0.0000
## race_ethnicity.blOther 0.7188
## high.educ.blBachelor 0.0533
## high.educ.blSome College 0.0000
## high.educ.blHS Diploma/GED 0.1489
## high.educ.bl< HS Diploma 0.1121
## prnt.empl.blStay at Home Parent 0.0505
## prnt.empl.blUnemployed 0.0002
## prnt.empl.blOther 0.0000
## neighb_phenx_avg_p.bl.cm 0.0000
## overall.income.bl[>=50K & <100K] 0.0001
## overall.income.bl[<50k] 0.0000
## overall.income.bl[Don't Know or Refuse] 0.0001
## sex.blFemale 0.0145
## Correlation:
## (Intr) in_.9. rc_t.H rc_t.B rc_t.O
## interview_age.c9.y -0.272
## race_ethnicity.blHispanic -0.171 0.008
## race_ethnicity.blBlack -0.151 0.012 0.357
## race_ethnicity.blOther -0.200 0.005 0.287 0.259
## high.educ.blBachelor -0.312 -0.002 -0.019 -0.012 -0.001
## high.educ.blSome College -0.215 0.003 -0.115 -0.089 -0.024
## high.educ.blHS Diploma/GED -0.136 0.010 -0.148 -0.150 -0.008
## high.educ.bl< HS Diploma -0.089 0.002 -0.180 -0.080 -0.011
## prnt.empl.blStay at Home Parent -0.154 0.012 0.039 0.091 0.016
## prnt.empl.blUnemployed -0.059 0.004 0.005 -0.044 0.008
## prnt.empl.blOther -0.070 0.005 0.042 0.010 -0.009
## neighb_phenx_avg_p.bl.cm -0.151 -0.006 0.048 0.157 0.049
## overall.income.bl[>=50K & <100K] -0.248 0.000 -0.091 -0.060 -0.009
## overall.income.bl[<50k] -0.158 0.001 -0.152 -0.185 -0.080
## overall.income.bl[Don't Know or Refuse] -0.137 -0.001 -0.108 -0.129 -0.064
## sex.blFemale -0.320 0.006 -0.009 -0.015 -0.018
## hgh..B hg..SC h..HSD h..<HD p..aHP
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College 0.464
## high.educ.blHS Diploma/GED 0.341 0.508
## high.educ.bl< HS Diploma 0.273 0.421 0.388
## prnt.empl.blStay at Home Parent -0.028 -0.014 -0.051 -0.095
## prnt.empl.blUnemployed -0.006 -0.007 -0.068 -0.099 0.152
## prnt.empl.blOther -0.010 -0.032 -0.008 -0.017 0.162
## neighb_phenx_avg_p.bl.cm -0.007 0.061 0.058 0.056 0.029
## overall.income.bl[>=50K & <100K] -0.174 -0.276 -0.172 -0.117 -0.031
## overall.income.bl[<50k] -0.162 -0.419 -0.370 -0.313 -0.053
## overall.income.bl[Don't Know or Refuse] -0.102 -0.256 -0.243 -0.227 -0.078
## sex.blFemale 0.017 0.024 0.015 -0.002 -0.005
## prn..U prn..O n___.. o..[&< o..[<5
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College
## high.educ.blHS Diploma/GED
## high.educ.bl< HS Diploma
## prnt.empl.blStay at Home Parent
## prnt.empl.blUnemployed
## prnt.empl.blOther 0.135
## neighb_phenx_avg_p.bl.cm 0.020 0.006
## overall.income.bl[>=50K & <100K] -0.017 -0.051 0.082
## overall.income.bl[<50k] -0.100 -0.142 0.156 0.512
## overall.income.bl[Don't Know or Refuse] -0.080 -0.100 0.089 0.369 0.496
## sex.blFemale 0.018 0.020 0.030 -0.008 -0.006
## o..KoR
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College
## high.educ.blHS Diploma/GED
## high.educ.bl< HS Diploma
## prnt.empl.blStay at Home Parent
## prnt.empl.blUnemployed
## prnt.empl.blOther
## neighb_phenx_avg_p.bl.cm
## overall.income.bl[>=50K & <100K]
## overall.income.bl[<50k]
## overall.income.bl[Don't Know or Refuse]
## sex.blFemale 0.007
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -2.7594639 -0.6292970 -0.5046082 0.3868743 4.6633112
##
## Number of Observations: 23857
## Number of Groups:
## abcd_site subjectid %in% abcd_site
## 21 9345
summary(withdep_zinb_r$zi.fit)
## Linear mixed-effects model fit by maximum likelihood
## Data: data
## AIC BIC logLik
## NA NA NA
##
## Random effects:
## Formula: ~1 | abcd_site
## (Intercept) Residual
## StdDev: 0.5317109 0.2651273
##
## Variance function:
## Structure: fixed weights
## Formula: ~invwt
## Fixed effects: zp ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl + prnt.empl.bl + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl
## Value Std.Error DF t-value
## (Intercept) -3.417296 0.12006310 23820 -28.46250
## interview_age.c9.y -0.203403 0.01062229 23820 -19.14873
## race_ethnicity.blHispanic 0.168921 0.03310490 23820 5.10261
## race_ethnicity.blBlack 0.582184 0.03568706 23820 16.31358
## race_ethnicity.blOther 0.012980 0.03537404 23820 0.36692
## high.educ.blBachelor 0.287193 0.02669407 23820 10.75867
## high.educ.blSome College 0.314541 0.03248792 23820 9.68179
## high.educ.blHS Diploma/GED 0.219772 0.04896009 23820 4.48880
## high.educ.bl< HS Diploma 0.664195 0.05823189 23820 11.40603
## prnt.empl.blStay at Home Parent -0.612322 0.03525756 23820 -17.36710
## prnt.empl.blUnemployed -0.139876 0.05008273 23820 -2.79290
## prnt.empl.blOther -0.033356 0.04308024 23820 -0.77428
## neighb_phenx_avg_p.bl.cm 0.460628 0.01395288 23820 33.01309
## overall.income.bl[>=50K & <100K] -0.493979 0.02931562 23820 -16.85036
## overall.income.bl[<50k] -0.831531 0.03886008 23820 -21.39807
## overall.income.bl[Don't Know or Refuse] 0.316985 0.03751395 23820 8.44979
## sex.blFemale 0.218279 0.02088107 23820 10.45342
## p-value
## (Intercept) 0.0000
## interview_age.c9.y 0.0000
## race_ethnicity.blHispanic 0.0000
## race_ethnicity.blBlack 0.0000
## race_ethnicity.blOther 0.7137
## high.educ.blBachelor 0.0000
## high.educ.blSome College 0.0000
## high.educ.blHS Diploma/GED 0.0000
## high.educ.bl< HS Diploma 0.0000
## prnt.empl.blStay at Home Parent 0.0000
## prnt.empl.blUnemployed 0.0052
## prnt.empl.blOther 0.4388
## neighb_phenx_avg_p.bl.cm 0.0000
## overall.income.bl[>=50K & <100K] 0.0000
## overall.income.bl[<50k] 0.0000
## overall.income.bl[Don't Know or Refuse] 0.0000
## sex.blFemale 0.0000
## Correlation:
## (Intr) in_.9. rc_t.H rc_t.B rc_t.O
## interview_age.c9.y -0.144
## race_ethnicity.blHispanic -0.053 0.015
## race_ethnicity.blBlack -0.049 0.031 0.346
## race_ethnicity.blOther -0.058 0.010 0.276 0.226
## high.educ.blBachelor -0.086 -0.017 -0.006 -0.001 0.018
## high.educ.blSome College -0.060 -0.004 -0.115 -0.107 0.009
## high.educ.blHS Diploma/GED -0.036 0.016 -0.135 -0.144 0.005
## high.educ.bl< HS Diploma -0.026 0.009 -0.160 -0.079 0.005
## prnt.empl.blStay at Home Parent -0.032 0.016 0.038 0.084 0.018
## prnt.empl.blUnemployed -0.016 0.004 0.013 -0.027 0.017
## prnt.empl.blOther -0.012 0.007 0.038 -0.013 -0.023
## neighb_phenx_avg_p.bl.cm -0.065 -0.004 0.024 0.117 0.036
## overall.income.bl[>=50K & <100K] -0.043 0.005 -0.103 -0.090 -0.025
## overall.income.bl[<50k] -0.026 -0.002 -0.126 -0.198 -0.062
## overall.income.bl[Don't Know or Refuse] -0.029 0.000 -0.113 -0.173 -0.067
## sex.blFemale -0.092 0.006 -0.005 -0.017 -0.009
## hgh..B hg..SC h..HSD h..<HD p..aHP
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College 0.441
## high.educ.blHS Diploma/GED 0.302 0.441
## high.educ.bl< HS Diploma 0.260 0.398 0.356
## prnt.empl.blStay at Home Parent -0.037 -0.021 -0.046 -0.104
## prnt.empl.blUnemployed -0.024 -0.016 -0.065 -0.130 0.112
## prnt.empl.blOther -0.028 -0.042 -0.025 -0.045 0.115
## neighb_phenx_avg_p.bl.cm -0.005 0.046 0.033 0.047 0.008
## overall.income.bl[>=50K & <100K] -0.140 -0.274 -0.161 -0.109 -0.018
## overall.income.bl[<50k] -0.113 -0.360 -0.324 -0.321 -0.027
## overall.income.bl[Don't Know or Refuse] -0.093 -0.279 -0.271 -0.273 -0.076
## sex.blFemale 0.008 0.019 0.007 -0.013 0.002
## prn..U prn..O n___.. o..[&< o..[<5
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College
## high.educ.blHS Diploma/GED
## high.educ.bl< HS Diploma
## prnt.empl.blStay at Home Parent
## prnt.empl.blUnemployed
## prnt.empl.blOther 0.116
## neighb_phenx_avg_p.bl.cm 0.032 -0.020
## overall.income.bl[>=50K & <100K] -0.010 -0.049 0.071
## overall.income.bl[<50k] -0.077 -0.115 0.116 0.403
## overall.income.bl[Don't Know or Refuse] -0.079 -0.108 0.074 0.364 0.489
## sex.blFemale 0.030 0.009 0.029 -0.020 -0.015
## o..KoR
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College
## high.educ.blHS Diploma/GED
## high.educ.bl< HS Diploma
## prnt.empl.blStay at Home Parent
## prnt.empl.blUnemployed
## prnt.empl.blOther
## neighb_phenx_avg_p.bl.cm
## overall.income.bl[>=50K & <100K]
## overall.income.bl[<50k]
## overall.income.bl[Don't Know or Refuse]
## sex.blFemale 0.000
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -2.6507402 -0.4949076 0.1660230 0.2645690 42.3666495
##
## Number of Observations: 23857
## Number of Groups: 21
anova(withdep_zinb_r)
## numDF denDF F-value p-value
## (Intercept) 1 14511 242.07620 <.0001
## interview_age.c9.y 1 14511 201.42562 <.0001
## race_ethnicity.bl 3 9309 8.11214 <.0001
## high.educ.bl 4 9309 28.37207 <.0001
## prnt.empl.bl 3 9309 18.09865 <.0001
## neighb_phenx_avg_p.bl.cm 1 9309 90.02855 <.0001
## overall.income.bl 3 9309 11.24198 <.0001
## sex.bl 1 9309 5.97599 0.0145
#Check outlier/residuals with this df
withdep_res <- df_cc
withdep_res$level1_resid.raw <- residuals(withdep_zinb_r)
withdep_res$level1_resid.pearson <- residuals(withdep_zinb_r, type="pearson")
#Add predicted values (Yhat)
withdep_res$cbcl_scr_syn_withdep_r_predicted <- predict(withdep_zinb_r,withdep_res,type="response")
#Incidence
withdep_res$incidence <- estimate.probability(withdep_res$cbcl_scr_syn_withdep_r, method="empirical")
#Plotting histogram of residuals, but may be skewed since using ZINB, so make sure to check below plots
hist(withdep_res$level1_resid.pearson)
### Incidence vs. X’s Plots
#age
ggplot(withdep_res,aes(incidence,interview_age)) + geom_point(color = "black") + geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 7.4176e-05
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 1.0709e-14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 1.3755e-09
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at 0
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 7.4176e-05
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
## number 1.0709e-14
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other near
## singularities as well. 1.3755e-09
### Residuals vs Y (CBCL Outcome) Plot
plot(withdep_res$level1_resid.pearson, withdep_res$cbcl_scr_syn_withdep_r)
### Residuals vs Yhat Plot
plot(withdep_res$level1_resid.pearson, withdep_res$cbcl_scr_syn_withdep_r_predicted)
### Residuals vs Row Plot
plot(as.numeric(rownames(withdep_res)),withdep_res$level1_resid.pearson)
### Residuals vs X’s Plots
#age
ggplot(withdep_res,aes(level1_resid.pearson,interview_age)) + geom_point(color = "black") + geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'
attention_zinb_r <- glmm.zinb(cbcl_scr_syn_attention_r ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl+ prnt.empl.bl + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl, random = ~1|abcd_site/subjectid,
zi_fixed = ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl+ prnt.empl.bl + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl, zi_random = ~1|abcd_site, data = df_cc)
## Computational iterations: 14
## Computational time: 2.015 minutes
summary(attention_zinb_r)
## Linear mixed-effects model fit by maximum likelihood
## Data: df_cc
## AIC BIC logLik
## NA NA NA
##
## Random effects:
## Formula: ~1 | abcd_site
## (Intercept)
## StdDev: 0.1358172
##
## Formula: ~1 | subjectid %in% abcd_site
## (Intercept) Residual
## StdDev: 1.140061 0.9079128
##
## Variance function:
## Structure: fixed weights
## Formula: ~invwt
## Fixed effects: cbcl_scr_syn_attention_r ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl + prnt.empl.bl + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl
## Value Std.Error DF t-value
## (Intercept) 0.6590844 0.04349792 14511 15.152091
## interview_age.c9.y -0.0353176 0.00462549 14511 -7.635430
## race_ethnicity.blHispanic -0.0593185 0.04124066 9309 -1.438350
## race_ethnicity.blBlack -0.0598611 0.04567308 9309 -1.310643
## race_ethnicity.blOther 0.0607076 0.04192187 9309 1.448112
## high.educ.blBachelor 0.1545420 0.03529460 9309 4.378632
## high.educ.blSome College 0.1952206 0.04032712 9309 4.840926
## high.educ.blHS Diploma/GED 0.0643164 0.05661169 9309 1.136097
## high.educ.bl< HS Diploma 0.0359965 0.07347570 9309 0.489911
## prnt.empl.blStay at Home Parent -0.0557609 0.03592310 9309 -1.552229
## prnt.empl.blUnemployed 0.1391686 0.05812674 9309 2.394227
## prnt.empl.blOther 0.1869133 0.05117346 9309 3.652545
## neighb_phenx_avg_p.bl.cm -0.1152415 0.01481732 9309 -7.777481
## overall.income.bl[>=50K & <100K] 0.0809051 0.03573678 9309 2.263918
## overall.income.bl[<50k] 0.1529989 0.04473905 9309 3.419808
## overall.income.bl[Don't Know or Refuse] 0.0469490 0.05612350 9309 0.836530
## sex.blFemale -0.4363546 0.02617693 9309 -16.669433
## p-value
## (Intercept) 0.0000
## interview_age.c9.y 0.0000
## race_ethnicity.blHispanic 0.1504
## race_ethnicity.blBlack 0.1900
## race_ethnicity.blOther 0.1476
## high.educ.blBachelor 0.0000
## high.educ.blSome College 0.0000
## high.educ.blHS Diploma/GED 0.2559
## high.educ.bl< HS Diploma 0.6242
## prnt.empl.blStay at Home Parent 0.1206
## prnt.empl.blUnemployed 0.0167
## prnt.empl.blOther 0.0003
## neighb_phenx_avg_p.bl.cm 0.0000
## overall.income.bl[>=50K & <100K] 0.0236
## overall.income.bl[<50k] 0.0006
## overall.income.bl[Don't Know or Refuse] 0.4029
## sex.blFemale 0.0000
## Correlation:
## (Intr) in_.9. rc_t.H rc_t.B rc_t.O
## interview_age.c9.y -0.188
## race_ethnicity.blHispanic -0.161 0.006
## race_ethnicity.blBlack -0.142 0.009 0.349
## race_ethnicity.blOther -0.183 0.003 0.284 0.260
## high.educ.blBachelor -0.280 -0.002 -0.016 -0.009 0.001
## high.educ.blSome College -0.192 0.002 -0.107 -0.079 -0.021
## high.educ.blHS Diploma/GED -0.119 0.006 -0.141 -0.144 -0.006
## high.educ.bl< HS Diploma -0.078 0.001 -0.168 -0.075 -0.010
## prnt.empl.blStay at Home Parent -0.136 0.008 0.043 0.091 0.016
## prnt.empl.blUnemployed -0.051 0.002 0.009 -0.042 0.009
## prnt.empl.blOther -0.060 0.004 0.041 0.009 -0.014
## neighb_phenx_avg_p.bl.cm -0.140 -0.004 0.043 0.153 0.048
## overall.income.bl[>=50K & <100K] -0.218 0.000 -0.092 -0.063 -0.012
## overall.income.bl[<50k] -0.136 0.000 -0.148 -0.188 -0.082
## overall.income.bl[Don't Know or Refuse] -0.120 -0.001 -0.101 -0.128 -0.059
## sex.blFemale -0.280 0.006 -0.006 -0.017 -0.016
## hgh..B hg..SC h..HSD h..<HD p..aHP
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College 0.460
## high.educ.blHS Diploma/GED 0.338 0.503
## high.educ.bl< HS Diploma 0.265 0.411 0.379
## prnt.empl.blStay at Home Parent -0.026 -0.013 -0.050 -0.092
## prnt.empl.blUnemployed -0.009 -0.010 -0.069 -0.099 0.148
## prnt.empl.blOther -0.011 -0.033 -0.013 -0.020 0.158
## neighb_phenx_avg_p.bl.cm -0.005 0.062 0.058 0.057 0.027
## overall.income.bl[>=50K & <100K] -0.173 -0.278 -0.175 -0.115 -0.032
## overall.income.bl[<50k] -0.161 -0.421 -0.370 -0.311 -0.055
## overall.income.bl[Don't Know or Refuse] -0.101 -0.255 -0.241 -0.220 -0.079
## sex.blFemale 0.014 0.021 0.014 -0.003 -0.005
## prn..U prn..O n___.. o..[&< o..[<5
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College
## high.educ.blHS Diploma/GED
## high.educ.bl< HS Diploma
## prnt.empl.blStay at Home Parent
## prnt.empl.blUnemployed
## prnt.empl.blOther 0.133
## neighb_phenx_avg_p.bl.cm 0.021 0.002
## overall.income.bl[>=50K & <100K] -0.016 -0.050 0.085
## overall.income.bl[<50k] -0.102 -0.138 0.156 0.508
## overall.income.bl[Don't Know or Refuse] -0.079 -0.099 0.088 0.363 0.488
## sex.blFemale 0.019 0.020 0.030 -0.006 -0.008
## o..KoR
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College
## high.educ.blHS Diploma/GED
## high.educ.bl< HS Diploma
## prnt.empl.blStay at Home Parent
## prnt.empl.blUnemployed
## prnt.empl.blOther
## neighb_phenx_avg_p.bl.cm
## overall.income.bl[>=50K & <100K]
## overall.income.bl[<50k]
## overall.income.bl[Don't Know or Refuse]
## sex.blFemale 0.006
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -2.5134447 -0.7118173 -0.2810833 0.4195582 3.8913914
##
## Number of Observations: 23857
## Number of Groups:
## abcd_site subjectid %in% abcd_site
## 21 9345
summary(attention_zinb_r$zi.fit)
## Linear mixed-effects model fit by maximum likelihood
## Data: data
## AIC BIC logLik
## NA NA NA
##
## Random effects:
## Formula: ~1 | abcd_site
## (Intercept) Residual
## StdDev: 0.5725051 0.3970165
##
## Variance function:
## Structure: fixed weights
## Formula: ~invwt
## Fixed effects: zp ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl + prnt.empl.bl + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl
## Value Std.Error DF t-value
## (Intercept) -5.592195 0.14585374 23820 -38.34112
## interview_age.c9.y 0.152881 0.02102163 23820 7.27254
## race_ethnicity.blHispanic 0.242567 0.07250235 23820 3.34564
## race_ethnicity.blBlack 0.799979 0.06953310 23820 11.50501
## race_ethnicity.blOther 0.456849 0.07149781 23820 6.38969
## high.educ.blBachelor 0.065373 0.06619424 23820 0.98760
## high.educ.blSome College 0.366416 0.07268933 23820 5.04085
## high.educ.blHS Diploma/GED 1.038075 0.08643505 23820 12.00988
## high.educ.bl< HS Diploma 2.022974 0.09075926 23820 22.28945
## prnt.empl.blStay at Home Parent 0.185903 0.05483332 23820 3.39034
## prnt.empl.blUnemployed -0.379432 0.09197028 23820 -4.12560
## prnt.empl.blOther -0.139438 0.08591830 23820 -1.62292
## neighb_phenx_avg_p.bl.cm 0.457926 0.02538887 23820 18.03647
## overall.income.bl[>=50K & <100K] -0.367172 0.06789232 23820 -5.40816
## overall.income.bl[<50k] -0.056680 0.07559964 23820 -0.74974
## overall.income.bl[Don't Know or Refuse] 0.099041 0.08592882 23820 1.15259
## sex.blFemale 0.744418 0.04397254 23820 16.92916
## p-value
## (Intercept) 0.0000
## interview_age.c9.y 0.0000
## race_ethnicity.blHispanic 0.0008
## race_ethnicity.blBlack 0.0000
## race_ethnicity.blOther 0.0000
## high.educ.blBachelor 0.3234
## high.educ.blSome College 0.0000
## high.educ.blHS Diploma/GED 0.0000
## high.educ.bl< HS Diploma 0.0000
## prnt.empl.blStay at Home Parent 0.0007
## prnt.empl.blUnemployed 0.0000
## prnt.empl.blOther 0.1046
## neighb_phenx_avg_p.bl.cm 0.0000
## overall.income.bl[>=50K & <100K] 0.0000
## overall.income.bl[<50k] 0.4534
## overall.income.bl[Don't Know or Refuse] 0.2491
## sex.blFemale 0.0000
## Correlation:
## (Intr) in_.9. rc_t.H rc_t.B rc_t.O
## interview_age.c9.y -0.293
## race_ethnicity.blHispanic -0.114 0.016
## race_ethnicity.blBlack -0.115 0.035 0.471
## race_ethnicity.blOther -0.130 0.011 0.376 0.370
## high.educ.blBachelor -0.167 -0.009 -0.005 0.008 0.017
## high.educ.blSome College -0.127 0.011 -0.100 -0.079 -0.009
## high.educ.blHS Diploma/GED -0.101 0.025 -0.154 -0.146 -0.004
## high.educ.bl< HS Diploma -0.092 0.016 -0.190 -0.113 -0.011
## prnt.empl.blStay at Home Parent -0.088 0.024 0.051 0.102 0.038
## prnt.empl.blUnemployed -0.032 0.008 0.018 -0.032 -0.004
## prnt.empl.blOther -0.029 0.013 0.049 -0.008 -0.021
## neighb_phenx_avg_p.bl.cm -0.104 0.001 0.025 0.128 0.032
## overall.income.bl[>=50K & <100K] -0.090 0.007 -0.091 -0.100 -0.046
## overall.income.bl[<50k] -0.068 0.003 -0.161 -0.223 -0.102
## overall.income.bl[Don't Know or Refuse] -0.064 0.025 -0.131 -0.164 -0.087
## sex.blFemale -0.200 0.018 0.002 -0.016 -0.011
## hgh..B hg..SC h..HSD h..<HD p..aHP
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College 0.485
## high.educ.blHS Diploma/GED 0.416 0.618
## high.educ.bl< HS Diploma 0.400 0.613 0.639
## prnt.empl.blStay at Home Parent -0.031 -0.016 -0.048 -0.108
## prnt.empl.blUnemployed -0.018 -0.004 -0.060 -0.107 0.181
## prnt.empl.blOther -0.020 -0.019 -0.007 -0.047 0.176
## neighb_phenx_avg_p.bl.cm -0.011 0.028 0.033 0.068 0.043
## overall.income.bl[>=50K & <100K] -0.155 -0.311 -0.242 -0.210 -0.018
## overall.income.bl[<50k] -0.151 -0.432 -0.451 -0.464 -0.041
## overall.income.bl[Don't Know or Refuse] -0.116 -0.324 -0.359 -0.377 -0.090
## sex.blFemale 0.007 0.009 0.008 0.000 0.017
## prn..U prn..O n___.. o..[&< o..[<5
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College
## high.educ.blHS Diploma/GED
## high.educ.bl< HS Diploma
## prnt.empl.blStay at Home Parent
## prnt.empl.blUnemployed
## prnt.empl.blOther 0.139
## neighb_phenx_avg_p.bl.cm 0.036 -0.014
## overall.income.bl[>=50K & <100K] -0.009 -0.040 0.051
## overall.income.bl[<50k] -0.050 -0.115 0.124 0.528
## overall.income.bl[Don't Know or Refuse] -0.053 -0.098 0.072 0.433 0.650
## sex.blFemale 0.036 0.010 0.021 -0.018 -0.008
## o..KoR
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College
## high.educ.blHS Diploma/GED
## high.educ.bl< HS Diploma
## prnt.empl.blStay at Home Parent
## prnt.empl.blUnemployed
## prnt.empl.blOther
## neighb_phenx_avg_p.bl.cm
## overall.income.bl[>=50K & <100K]
## overall.income.bl[<50k]
## overall.income.bl[Don't Know or Refuse]
## sex.blFemale -0.018
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -1.8033304 -0.2751430 -0.1807364 0.1612190 39.7857867
##
## Number of Observations: 23857
## Number of Groups: 21
anova(attention_zinb_r)
## numDF denDF F-value p-value
## (Intercept) 1 14511 301.71089 <.0001
## interview_age.c9.y 1 14511 58.55577 <.0001
## race_ethnicity.bl 3 9309 4.71680 0.0027
## high.educ.bl 4 9309 21.82550 <.0001
## prnt.empl.bl 3 9309 11.49861 <.0001
## neighb_phenx_avg_p.bl.cm 1 9309 62.26850 <.0001
## overall.income.bl 3 9309 3.97771 0.0076
## sex.bl 1 9309 277.87000 <.0001
#Check outlier/residuals with this df
attention_res <- df_cc
attention_res$level1_resid.raw <- residuals(attention_zinb_r)
attention_res$level1_resid.pearson <- residuals(attention_zinb_r, type="pearson")
#Add predicted values (Yhat)
attention_res$cbcl_scr_syn_attention_r_predicted <- predict(attention_zinb_r,attention_res,type="response")
#Incidence
attention_res$incidence <- estimate.probability(attention_res$cbcl_scr_syn_attention_r, method="empirical")
#Plotting histogram of residuals, but may be skewed since using ZINB, so make sure to check below plots
hist(attention_res$level1_resid.pearson)
### Incidence vs. X’s Plots
#age
ggplot(attention_res,aes(incidence,interview_age)) + geom_point(color = "black") + geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'
### Residuals vs Y (CBCL Outcome) Plot
plot(attention_res$level1_resid.pearson, attention_res$cbcl_scr_syn_attention_r)
### Residuals vs Yhat Plot
plot(attention_res$level1_resid.pearson, attention_res$cbcl_scr_syn_attention_r_predicted)
### Residuals vs Row Plot
plot(as.numeric(rownames(attention_res)),attention_res$level1_resid.pearson)
### Residuals vs X’s Plots
#age
ggplot(attention_res,aes(level1_resid.pearson,interview_age)) + geom_point(color = "black") + geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'
rulebreak_zinb_r <- glmm.zinb(cbcl_scr_syn_rulebreak_r ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl+ prnt.empl.bl + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl, random = ~1|abcd_site/subjectid,
zi_fixed = ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl+ prnt.empl.bl + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl, zi_random = ~1|abcd_site, data = df_cc)
## Computational iterations: 15
## Computational time: 2.045 minutes
summary(rulebreak_zinb_r)
## Linear mixed-effects model fit by maximum likelihood
## Data: df_cc
## AIC BIC logLik
## NA NA NA
##
## Random effects:
## Formula: ~1 | abcd_site
## (Intercept)
## StdDev: 0.1243575
##
## Formula: ~1 | subjectid %in% abcd_site
## (Intercept) Residual
## StdDev: 1.193925 0.791083
##
## Variance function:
## Structure: fixed weights
## Formula: ~invwt
## Fixed effects: cbcl_scr_syn_rulebreak_r ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl + prnt.empl.bl + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl
## Value Std.Error DF t-value
## (Intercept) -0.5465811 0.04542784 14511 -12.031853
## interview_age.c9.y -0.0317480 0.00637374 14511 -4.981059
## race_ethnicity.blHispanic -0.0764440 0.04559273 9309 -1.676670
## race_ethnicity.blBlack 0.0895467 0.05001752 9309 1.790307
## race_ethnicity.blOther 0.0799822 0.04670067 9309 1.712657
## high.educ.blBachelor 0.1805187 0.03983589 9309 4.531561
## high.educ.blSome College 0.3288013 0.04485652 9309 7.330066
## high.educ.blHS Diploma/GED 0.1888618 0.06238413 9309 3.027401
## high.educ.bl< HS Diploma 0.2258188 0.08010467 9309 2.819046
## prnt.empl.blStay at Home Parent -0.0333214 0.04008685 9309 -0.831230
## prnt.empl.blUnemployed 0.1785418 0.06336153 9309 2.817827
## prnt.empl.blOther 0.2555895 0.05601209 9309 4.563112
## neighb_phenx_avg_p.bl.cm -0.1183948 0.01635967 9309 -7.236994
## overall.income.bl[>=50K & <100K] 0.1480522 0.04016668 9309 3.685946
## overall.income.bl[<50k] 0.3286485 0.04956715 9309 6.630370
## overall.income.bl[Don't Know or Refuse] 0.2581262 0.06198526 9309 4.164316
## sex.blFemale -0.4248865 0.02923940 9309 -14.531300
## p-value
## (Intercept) 0.0000
## interview_age.c9.y 0.0000
## race_ethnicity.blHispanic 0.0936
## race_ethnicity.blBlack 0.0734
## race_ethnicity.blOther 0.0868
## high.educ.blBachelor 0.0000
## high.educ.blSome College 0.0000
## high.educ.blHS Diploma/GED 0.0025
## high.educ.bl< HS Diploma 0.0048
## prnt.empl.blStay at Home Parent 0.4059
## prnt.empl.blUnemployed 0.0048
## prnt.empl.blOther 0.0000
## neighb_phenx_avg_p.bl.cm 0.0000
## overall.income.bl[>=50K & <100K] 0.0002
## overall.income.bl[<50k] 0.0000
## overall.income.bl[Don't Know or Refuse] 0.0000
## sex.blFemale 0.0000
## Correlation:
## (Intr) in_.9. rc_t.H rc_t.B rc_t.O
## interview_age.c9.y -0.248
## race_ethnicity.blHispanic -0.169 0.008
## race_ethnicity.blBlack -0.152 0.012 0.361
## race_ethnicity.blOther -0.197 0.005 0.288 0.270
## high.educ.blBachelor -0.308 -0.002 -0.020 -0.011 0.001
## high.educ.blSome College -0.216 0.003 -0.107 -0.080 -0.021
## high.educ.blHS Diploma/GED -0.136 0.008 -0.144 -0.148 -0.009
## high.educ.bl< HS Diploma -0.089 0.001 -0.173 -0.079 -0.013
## prnt.empl.blStay at Home Parent -0.146 0.011 0.041 0.093 0.016
## prnt.empl.blUnemployed -0.055 0.003 0.010 -0.040 0.010
## prnt.empl.blOther -0.065 0.005 0.047 0.015 -0.012
## neighb_phenx_avg_p.bl.cm -0.149 -0.004 0.052 0.157 0.054
## overall.income.bl[>=50K & <100K] -0.241 -0.002 -0.094 -0.068 -0.016
## overall.income.bl[<50k] -0.153 -0.001 -0.152 -0.190 -0.082
## overall.income.bl[Don't Know or Refuse] -0.136 -0.001 -0.106 -0.131 -0.059
## sex.blFemale -0.290 0.007 -0.009 -0.020 -0.016
## hgh..B hg..SC h..HSD h..<HD p..aHP
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College 0.473
## high.educ.blHS Diploma/GED 0.351 0.516
## high.educ.bl< HS Diploma 0.279 0.424 0.389
## prnt.empl.blStay at Home Parent -0.030 -0.017 -0.051 -0.099
## prnt.empl.blUnemployed -0.008 -0.008 -0.068 -0.100 0.152
## prnt.empl.blOther -0.013 -0.034 -0.016 -0.023 0.162
## neighb_phenx_avg_p.bl.cm -0.005 0.061 0.055 0.054 0.032
## overall.income.bl[>=50K & <100K] -0.171 -0.275 -0.175 -0.117 -0.028
## overall.income.bl[<50k] -0.161 -0.420 -0.368 -0.312 -0.051
## overall.income.bl[Don't Know or Refuse] -0.101 -0.259 -0.245 -0.223 -0.071
## sex.blFemale 0.011 0.021 0.014 -0.003 -0.007
## prn..U prn..O n___.. o..[&< o..[<5
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College
## high.educ.blHS Diploma/GED
## high.educ.bl< HS Diploma
## prnt.empl.blStay at Home Parent
## prnt.empl.blUnemployed
## prnt.empl.blOther 0.138
## neighb_phenx_avg_p.bl.cm 0.025 0.004
## overall.income.bl[>=50K & <100K] -0.015 -0.049 0.082
## overall.income.bl[<50k] -0.102 -0.140 0.157 0.519
## overall.income.bl[Don't Know or Refuse] -0.083 -0.101 0.087 0.375 0.502
## sex.blFemale 0.014 0.017 0.032 -0.010 -0.011
## o..KoR
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College
## high.educ.blHS Diploma/GED
## high.educ.bl< HS Diploma
## prnt.empl.blStay at Home Parent
## prnt.empl.blUnemployed
## prnt.empl.blOther
## neighb_phenx_avg_p.bl.cm
## overall.income.bl[>=50K & <100K]
## overall.income.bl[<50k]
## overall.income.bl[Don't Know or Refuse]
## sex.blFemale 0.004
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -2.9397639 -0.6403547 -0.4930191 0.3938430 3.9186439
##
## Number of Observations: 23857
## Number of Groups:
## abcd_site subjectid %in% abcd_site
## 21 9345
summary(rulebreak_zinb_r$zi.fit)
## Linear mixed-effects model fit by maximum likelihood
## Data: data
## AIC BIC logLik
## NA NA NA
##
## Random effects:
## Formula: ~1 | abcd_site
## (Intercept) Residual
## StdDev: 0.4582038 0.2642232
##
## Variance function:
## Structure: fixed weights
## Formula: ~invwt
## Fixed effects: zp ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl + prnt.empl.bl + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl
## Value Std.Error DF t-value
## (Intercept) -4.740349 0.10647654 23820 -44.52013
## interview_age.c9.y 0.252951 0.01080520 23820 23.41014
## race_ethnicity.blHispanic 0.213824 0.03460635 23820 6.17876
## race_ethnicity.blBlack -0.187430 0.04798726 23820 -3.90583
## race_ethnicity.blOther -0.037442 0.03567213 23820 -1.04961
## high.educ.blBachelor -0.255060 0.02776595 23820 -9.18606
## high.educ.blSome College -0.656245 0.03794397 23820 -17.29510
## high.educ.blHS Diploma/GED -0.140740 0.04999219 23820 -2.81525
## high.educ.bl< HS Diploma 0.021875 0.05839111 23820 0.37463
## prnt.empl.blStay at Home Parent -0.010609 0.02998149 23820 -0.35384
## prnt.empl.blUnemployed -0.145182 0.05869044 23820 -2.47370
## prnt.empl.blOther -0.051310 0.04952225 23820 -1.03610
## neighb_phenx_avg_p.bl.cm 0.383487 0.01454822 23820 26.35971
## overall.income.bl[>=50K & <100K] -0.276262 0.03061828 23820 -9.02278
## overall.income.bl[<50k] -0.015216 0.03935825 23820 -0.38659
## overall.income.bl[Don't Know or Refuse] 0.400206 0.04216651 23820 9.49108
## sex.blFemale 1.206686 0.02455250 23820 49.14717
## p-value
## (Intercept) 0.0000
## interview_age.c9.y 0.0000
## race_ethnicity.blHispanic 0.0000
## race_ethnicity.blBlack 0.0001
## race_ethnicity.blOther 0.2939
## high.educ.blBachelor 0.0000
## high.educ.blSome College 0.0000
## high.educ.blHS Diploma/GED 0.0049
## high.educ.bl< HS Diploma 0.7079
## prnt.empl.blStay at Home Parent 0.7235
## prnt.empl.blUnemployed 0.0134
## prnt.empl.blOther 0.3002
## neighb_phenx_avg_p.bl.cm 0.0000
## overall.income.bl[>=50K & <100K] 0.0000
## overall.income.bl[<50k] 0.6991
## overall.income.bl[Don't Know or Refuse] 0.0000
## sex.blFemale 0.0000
## Correlation:
## (Intr) in_.9. rc_t.H rc_t.B rc_t.O
## interview_age.c9.y -0.211
## race_ethnicity.blHispanic -0.060 0.016
## race_ethnicity.blBlack -0.050 0.023 0.283
## race_ethnicity.blOther -0.062 0.002 0.288 0.178
## high.educ.blBachelor -0.073 -0.013 -0.003 -0.005 0.011
## high.educ.blSome College -0.039 -0.008 -0.127 -0.084 0.000
## high.educ.blHS Diploma/GED -0.025 0.013 -0.183 -0.136 0.002
## high.educ.bl< HS Diploma -0.015 0.004 -0.204 -0.083 0.010
## prnt.empl.blStay at Home Parent -0.049 0.018 0.048 0.075 0.012
## prnt.empl.blUnemployed -0.022 0.006 0.001 -0.039 0.004
## prnt.empl.blOther -0.018 0.004 0.031 -0.015 -0.026
## neighb_phenx_avg_p.bl.cm -0.076 0.002 0.028 0.098 0.023
## overall.income.bl[>=50K & <100K] -0.061 0.007 -0.089 -0.057 -0.016
## overall.income.bl[<50k] -0.041 0.002 -0.132 -0.147 -0.066
## overall.income.bl[Don't Know or Refuse] -0.042 0.006 -0.097 -0.099 -0.068
## sex.blFemale -0.173 0.019 -0.003 -0.012 -0.015
## hgh..B hg..SC h..HSD h..<HD p..aHP
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College 0.342
## high.educ.blHS Diploma/GED 0.271 0.405
## high.educ.bl< HS Diploma 0.239 0.380 0.405
## prnt.empl.blStay at Home Parent -0.043 -0.029 -0.074 -0.129
## prnt.empl.blUnemployed -0.017 -0.015 -0.056 -0.100 0.133
## prnt.empl.blOther -0.029 -0.025 0.005 -0.028 0.139
## neighb_phenx_avg_p.bl.cm -0.016 0.037 0.041 0.063 0.022
## overall.income.bl[>=50K & <100K] -0.143 -0.220 -0.134 -0.091 -0.015
## overall.income.bl[<50k] -0.146 -0.381 -0.393 -0.359 -0.019
## overall.income.bl[Don't Know or Refuse] -0.094 -0.227 -0.263 -0.244 -0.083
## sex.blFemale 0.004 0.004 -0.002 -0.011 0.001
## prn..U prn..O n___.. o..[&< o..[<5
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College
## high.educ.blHS Diploma/GED
## high.educ.bl< HS Diploma
## prnt.empl.blStay at Home Parent
## prnt.empl.blUnemployed
## prnt.empl.blOther 0.100
## neighb_phenx_avg_p.bl.cm 0.028 0.002
## overall.income.bl[>=50K & <100K] -0.009 -0.052 0.090
## overall.income.bl[<50k] -0.070 -0.133 0.120 0.384
## overall.income.bl[Don't Know or Refuse] -0.060 -0.099 0.075 0.301 0.456
## sex.blFemale 0.027 0.010 0.033 -0.015 -0.004
## o..KoR
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College
## high.educ.blHS Diploma/GED
## high.educ.bl< HS Diploma
## prnt.empl.blStay at Home Parent
## prnt.empl.blUnemployed
## prnt.empl.blOther
## neighb_phenx_avg_p.bl.cm
## overall.income.bl[>=50K & <100K]
## overall.income.bl[<50k]
## overall.income.bl[Don't Know or Refuse]
## sex.blFemale 0.015
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -2.0022981 -0.4345395 0.1482323 0.2568752 45.6356384
##
## Number of Observations: 23857
## Number of Groups: 21
anova(rulebreak_zinb_r)
## numDF denDF F-value p-value
## (Intercept) 1 14511 192.14711 <.0001
## interview_age.c9.y 1 14511 26.83750 <.0001
## race_ethnicity.bl 3 9309 30.95390 <.0001
## high.educ.bl 4 9309 50.04633 <.0001
## prnt.empl.bl 3 9309 16.78715 <.0001
## neighb_phenx_avg_p.bl.cm 1 9309 62.56278 <.0001
## overall.income.bl 3 9309 14.39912 <.0001
## sex.bl 1 9309 211.15868 <.0001
#Check outlier/residuals with this df
rulebreak_res <- df_cc
rulebreak_res$level1_resid.raw <- residuals(rulebreak_zinb_r)
rulebreak_res$level1_resid.pearson <- residuals(rulebreak_zinb_r, type="pearson")
#Add predicted values (Yhat)
rulebreak_res$cbcl_scr_syn_rulebreak_r_predicted <- predict(rulebreak_zinb_r,rulebreak_res,type="response")
#Incidence
rulebreak_res$incidence <- estimate.probability(rulebreak_res$cbcl_scr_syn_rulebreak_r, method="empirical")
#Plotting histogram of residuals, but may be skewed since using ZINB, so make sure to check below plots
hist(rulebreak_res$level1_resid.pearson)
### Incidence vs. X’s Plots
#age
ggplot(rulebreak_res,aes(incidence,interview_age)) + geom_point(color = "black") + geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 7.303e-05
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 1.3261e-14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 1.3333e-09
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at 0
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 7.303e-05
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
## number 1.3261e-14
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other near
## singularities as well. 1.3333e-09
### Residuals vs Y (CBCL Outcome) Plot
plot(rulebreak_res$level1_resid.pearson, rulebreak_res$cbcl_scr_syn_rulebreak_r)
### Residuals vs Yhat Plot
plot(rulebreak_res$level1_resid.pearson, rulebreak_res$cbcl_scr_syn_rulebreak_r_predicted)
### Residuals vs Row Plot
plot(as.numeric(rownames(rulebreak_res)),rulebreak_res$level1_resid.pearson)
### Residuals vs X’s Plots
#age
ggplot(rulebreak_res,aes(level1_resid.pearson,interview_age)) + geom_point(color = "black") + geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'
aggressive_zinb_r <- glmm.zinb(cbcl_scr_syn_aggressive_r ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl+ prnt.empl.bl + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl, random = ~1|abcd_site/subjectid,
zi_fixed = ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl+ prnt.empl.bl + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl, zi_random = ~1|abcd_site, data = df_cc)
## Computational iterations: 11
## Computational time: 1.444 minutes
summary(aggressive_zinb_r)
## Linear mixed-effects model fit by maximum likelihood
## Data: df_cc
## AIC BIC logLik
## NA NA NA
##
## Random effects:
## Formula: ~1 | abcd_site
## (Intercept)
## StdDev: 0.1324439
##
## Formula: ~1 | subjectid %in% abcd_site
## (Intercept) Residual
## StdDev: 1.14606 0.9764887
##
## Variance function:
## Structure: fixed weights
## Formula: ~invwt
## Fixed effects: cbcl_scr_syn_aggressive_r ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl + prnt.empl.bl + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl
## Value Std.Error DF t-value
## (Intercept) 0.6339320 0.04339081 14511 14.609821
## interview_age.c9.y -0.0440928 0.00491956 14511 -8.962753
## race_ethnicity.blHispanic -0.0618377 0.04166748 9309 -1.484075
## race_ethnicity.blBlack -0.1962362 0.04651336 9309 -4.218922
## race_ethnicity.blOther -0.0882120 0.04277408 9309 -2.062276
## high.educ.blBachelor 0.0878093 0.03580335 9309 2.452545
## high.educ.blSome College 0.1513259 0.04084311 9309 3.705052
## high.educ.blHS Diploma/GED 0.0255209 0.05735353 9309 0.444976
## high.educ.bl< HS Diploma 0.0876659 0.07376062 9309 1.188519
## prnt.empl.blStay at Home Parent 0.0249738 0.03624331 9309 0.689060
## prnt.empl.blUnemployed 0.2126384 0.05879142 9309 3.616827
## prnt.empl.blOther 0.1821335 0.05190958 9309 3.508668
## neighb_phenx_avg_p.bl.cm -0.1234312 0.01499053 9309 -8.233944
## overall.income.bl[>=50K & <100K] 0.1213684 0.03617907 9309 3.354659
## overall.income.bl[<50k] 0.2465593 0.04523913 9309 5.450134
## overall.income.bl[Don't Know or Refuse] 0.1352556 0.05686503 9309 2.378537
## sex.blFemale -0.2496839 0.02648675 9309 -9.426747
## p-value
## (Intercept) 0.0000
## interview_age.c9.y 0.0000
## race_ethnicity.blHispanic 0.1378
## race_ethnicity.blBlack 0.0000
## race_ethnicity.blOther 0.0392
## high.educ.blBachelor 0.0142
## high.educ.blSome College 0.0002
## high.educ.blHS Diploma/GED 0.6563
## high.educ.bl< HS Diploma 0.2347
## prnt.empl.blStay at Home Parent 0.4908
## prnt.empl.blUnemployed 0.0003
## prnt.empl.blOther 0.0005
## neighb_phenx_avg_p.bl.cm 0.0000
## overall.income.bl[>=50K & <100K] 0.0008
## overall.income.bl[<50k] 0.0000
## overall.income.bl[Don't Know or Refuse] 0.0174
## sex.blFemale 0.0000
## Correlation:
## (Intr) in_.9. rc_t.H rc_t.B rc_t.O
## interview_age.c9.y -0.200
## race_ethnicity.blHispanic -0.160 0.007
## race_ethnicity.blBlack -0.140 0.010 0.349
## race_ethnicity.blOther -0.182 0.004 0.284 0.258
## high.educ.blBachelor -0.280 -0.001 -0.021 -0.015 -0.002
## high.educ.blSome College -0.193 0.003 -0.112 -0.086 -0.026
## high.educ.blHS Diploma/GED -0.120 0.007 -0.145 -0.148 -0.010
## high.educ.bl< HS Diploma -0.077 0.001 -0.173 -0.081 -0.014
## prnt.empl.blStay at Home Parent -0.139 0.009 0.042 0.092 0.018
## prnt.empl.blUnemployed -0.053 0.003 0.008 -0.042 0.011
## prnt.empl.blOther -0.061 0.003 0.040 0.010 -0.011
## neighb_phenx_avg_p.bl.cm -0.140 -0.004 0.045 0.152 0.048
## overall.income.bl[>=50K & <100K] -0.223 -0.001 -0.090 -0.062 -0.011
## overall.income.bl[<50k] -0.141 -0.001 -0.147 -0.185 -0.081
## overall.income.bl[Don't Know or Refuse] -0.124 -0.001 -0.100 -0.126 -0.056
## sex.blFemale -0.288 0.005 -0.009 -0.019 -0.019
## hgh..B hg..SC h..HSD h..<HD p..aHP
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College 0.460
## high.educ.blHS Diploma/GED 0.339 0.502
## high.educ.bl< HS Diploma 0.269 0.414 0.381
## prnt.empl.blStay at Home Parent -0.031 -0.016 -0.050 -0.096
## prnt.empl.blUnemployed -0.010 -0.011 -0.069 -0.099 0.149
## prnt.empl.blOther -0.014 -0.033 -0.014 -0.020 0.159
## neighb_phenx_avg_p.bl.cm -0.007 0.061 0.056 0.053 0.029
## overall.income.bl[>=50K & <100K] -0.176 -0.276 -0.174 -0.117 -0.028
## overall.income.bl[<50k] -0.161 -0.418 -0.367 -0.312 -0.053
## overall.income.bl[Don't Know or Refuse] -0.101 -0.254 -0.241 -0.221 -0.076
## sex.blFemale 0.014 0.021 0.014 -0.005 -0.006
## prn..U prn..O n___.. o..[&< o..[<5
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College
## high.educ.blHS Diploma/GED
## high.educ.bl< HS Diploma
## prnt.empl.blStay at Home Parent
## prnt.empl.blUnemployed
## prnt.empl.blOther 0.134
## neighb_phenx_avg_p.bl.cm 0.024 0.004
## overall.income.bl[>=50K & <100K] -0.014 -0.049 0.083
## overall.income.bl[<50k] -0.101 -0.138 0.157 0.507
## overall.income.bl[Don't Know or Refuse] -0.078 -0.098 0.087 0.362 0.488
## sex.blFemale 0.019 0.017 0.027 -0.005 -0.006
## o..KoR
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College
## high.educ.blHS Diploma/GED
## high.educ.bl< HS Diploma
## prnt.empl.blStay at Home Parent
## prnt.empl.blUnemployed
## prnt.empl.blOther
## neighb_phenx_avg_p.bl.cm
## overall.income.bl[>=50K & <100K]
## overall.income.bl[<50k]
## overall.income.bl[Don't Know or Refuse]
## sex.blFemale 0.008
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -3.4103429 -0.7122797 -0.3411881 0.4042947 4.0404707
##
## Number of Observations: 23857
## Number of Groups:
## abcd_site subjectid %in% abcd_site
## 21 9345
summary(aggressive_zinb_r$zi.fit)
## Linear mixed-effects model fit by maximum likelihood
## Data: data
## AIC BIC logLik
## NA NA NA
##
## Random effects:
## Formula: ~1 | abcd_site
## (Intercept) Residual
## StdDev: 0.2788267 0.4684907
##
## Variance function:
## Structure: fixed weights
## Formula: ~invwt
## Fixed effects: zp ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl + prnt.empl.bl + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl
## Value Std.Error DF t-value
## (Intercept) -4.335767 0.08501125 23820 -51.00227
## interview_age.c9.y 0.245121 0.01808395 23820 13.55460
## race_ethnicity.blHispanic 0.379047 0.05914867 23820 6.40837
## race_ethnicity.blBlack 0.742765 0.06116242 23820 12.14415
## race_ethnicity.blOther 0.474197 0.05661694 23820 8.37553
## high.educ.blBachelor 0.133514 0.04983392 23820 2.67917
## high.educ.blSome College 0.230652 0.05826166 23820 3.95889
## high.educ.blHS Diploma/GED 0.375390 0.08003361 23820 4.69040
## high.educ.bl< HS Diploma 1.003641 0.08724317 23820 11.50395
## prnt.empl.blStay at Home Parent -0.019367 0.05161527 23820 -0.37521
## prnt.empl.blUnemployed 0.223883 0.07382609 23820 3.03258
## prnt.empl.blOther -0.156931 0.07930262 23820 -1.97888
## neighb_phenx_avg_p.bl.cm 0.259068 0.02210036 23820 11.72235
## overall.income.bl[>=50K & <100K] -0.293459 0.05194948 23820 -5.64894
## overall.income.bl[<50k] -0.518706 0.06518059 23820 -7.95799
## overall.income.bl[Don't Know or Refuse] 0.050936 0.07159839 23820 0.71141
## sex.blFemale 0.203112 0.03649427 23820 5.56559
## p-value
## (Intercept) 0.0000
## interview_age.c9.y 0.0000
## race_ethnicity.blHispanic 0.0000
## race_ethnicity.blBlack 0.0000
## race_ethnicity.blOther 0.0000
## high.educ.blBachelor 0.0074
## high.educ.blSome College 0.0001
## high.educ.blHS Diploma/GED 0.0000
## high.educ.bl< HS Diploma 0.0000
## prnt.empl.blStay at Home Parent 0.7075
## prnt.empl.blUnemployed 0.0024
## prnt.empl.blOther 0.0478
## neighb_phenx_avg_p.bl.cm 0.0000
## overall.income.bl[>=50K & <100K] 0.0000
## overall.income.bl[<50k] 0.0000
## overall.income.bl[Don't Know or Refuse] 0.4768
## sex.blFemale 0.0000
## Correlation:
## (Intr) in_.9. rc_t.H rc_t.B rc_t.O
## interview_age.c9.y -0.444
## race_ethnicity.blHispanic -0.158 0.017
## race_ethnicity.blBlack -0.150 0.035 0.404
## race_ethnicity.blOther -0.177 0.005 0.340 0.307
## high.educ.blBachelor -0.219 -0.004 -0.004 -0.001 0.019
## high.educ.blSome College -0.153 0.007 -0.115 -0.092 -0.003
## high.educ.blHS Diploma/GED -0.105 0.023 -0.143 -0.146 0.005
## high.educ.bl< HS Diploma -0.085 0.016 -0.173 -0.094 0.001
## prnt.empl.blStay at Home Parent -0.111 0.021 0.050 0.102 0.021
## prnt.empl.blUnemployed -0.051 0.010 0.018 -0.028 0.014
## prnt.empl.blOther -0.039 0.011 0.043 0.002 -0.020
## neighb_phenx_avg_p.bl.cm -0.139 0.002 0.029 0.140 0.034
## overall.income.bl[>=50K & <100K] -0.129 0.000 -0.101 -0.090 -0.024
## overall.income.bl[<50k] -0.078 -0.002 -0.138 -0.206 -0.077
## overall.income.bl[Don't Know or Refuse] -0.081 0.013 -0.111 -0.147 -0.063
## sex.blFemale -0.235 0.015 -0.004 -0.023 -0.012
## hgh..B hg..SC h..HSD h..<HD p..aHP
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College 0.452
## high.educ.blHS Diploma/GED 0.340 0.507
## high.educ.bl< HS Diploma 0.317 0.491 0.475
## prnt.empl.blStay at Home Parent -0.030 -0.018 -0.051 -0.113
## prnt.empl.blUnemployed -0.015 -0.009 -0.079 -0.129 0.175
## prnt.empl.blOther -0.020 -0.030 -0.011 -0.031 0.145
## neighb_phenx_avg_p.bl.cm -0.001 0.054 0.048 0.067 0.028
## overall.income.bl[>=50K & <100K] -0.154 -0.292 -0.188 -0.149 -0.022
## overall.income.bl[<50k] -0.138 -0.397 -0.379 -0.390 -0.047
## overall.income.bl[Don't Know or Refuse] -0.105 -0.291 -0.293 -0.314 -0.091
## sex.blFemale 0.012 0.017 0.011 -0.004 0.004
## prn..U prn..O n___.. o..[&< o..[<5
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College
## high.educ.blHS Diploma/GED
## high.educ.bl< HS Diploma
## prnt.empl.blStay at Home Parent
## prnt.empl.blUnemployed
## prnt.empl.blOther 0.141
## neighb_phenx_avg_p.bl.cm 0.035 -0.007
## overall.income.bl[>=50K & <100K] -0.017 -0.046 0.077
## overall.income.bl[<50k] -0.097 -0.124 0.137 0.474
## overall.income.bl[Don't Know or Refuse] -0.098 -0.113 0.093 0.390 0.554
## sex.blFemale 0.035 0.011 0.024 0.001 0.000
## o..KoR
## interview_age.c9.y
## race_ethnicity.blHispanic
## race_ethnicity.blBlack
## race_ethnicity.blOther
## high.educ.blBachelor
## high.educ.blSome College
## high.educ.blHS Diploma/GED
## high.educ.bl< HS Diploma
## prnt.empl.blStay at Home Parent
## prnt.empl.blUnemployed
## prnt.empl.blOther
## neighb_phenx_avg_p.bl.cm
## overall.income.bl[>=50K & <100K]
## overall.income.bl[<50k]
## overall.income.bl[Don't Know or Refuse]
## sex.blFemale 0.001
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -1.1537437 -0.3647711 -0.2745072 0.2188908 19.0696631
##
## Number of Observations: 23857
## Number of Groups: 21
anova(aggressive_zinb_r)
## numDF denDF F-value p-value
## (Intercept) 1 14511 354.3314 <.0001
## interview_age.c9.y 1 14511 81.9047 <.0001
## race_ethnicity.bl 3 9309 2.5172 0.0563
## high.educ.bl 4 9309 21.7835 <.0001
## prnt.empl.bl 3 9309 13.0736 <.0001
## neighb_phenx_avg_p.bl.cm 1 9309 79.9245 <.0001
## overall.income.bl 3 9309 9.8816 <.0001
## sex.bl 1 9309 88.8636 <.0001
#Check outlier/residuals with this df
aggressive_res <- df_cc
aggressive_res$level1_resid.raw <- residuals(aggressive_zinb_r)
aggressive_res$level1_resid.pearson <- residuals(aggressive_zinb_r, type="pearson")
#Add predicted values (Yhat)
aggressive_res$cbcl_scr_syn_aggressive_r_predicted <- predict(aggressive_zinb_r,aggressive_res,type="response")
#Incidence
aggressive_res$incidence <- estimate.probability(aggressive_res$cbcl_scr_syn_aggressive_r, method="empirical")
#Plotting histogram of residuals, but may be skewed since using ZINB, so make sure to check below plots
hist(aggressive_res$level1_resid.pearson)
### Incidence vs. X’s Plots
#age
ggplot(aggressive_res,aes(incidence,interview_age)) + geom_point(color = "black") + geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'
### Residuals vs Y (CBCL Outcome) Plot
plot(aggressive_res$level1_resid.pearson, aggressive_res$cbcl_scr_syn_aggressive_r)
### Residuals vs Yhat Plot
plot(aggressive_res$level1_resid.pearson, aggressive_res$cbcl_scr_syn_aggressive_r_predicted)
### Residuals vs Row Plot
plot(as.numeric(rownames(aggressive_res)),aggressive_res$level1_resid.pearson)
### Residuals vs X’s Plots
#age
ggplot(aggressive_res,aes(level1_resid.pearson,interview_age)) + geom_point(color = "black") + geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'
#take snapshot for environment dependencies
renv::snapshot()
#update if running again
renv::update()